polardbxengine/storage/ndb/test/ndbapi/testRedo.cpp

/*
   Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License, version 2.0,
   as published by the Free Software Foundation.

   This program is also distributed with certain software (including
   but not limited to OpenSSL) that is licensed under separate terms,
   as designated in a particular file or component or in included license
   documentation.  The authors of MySQL hereby grant you an additional
   permission to link the program and your derivative works with the
   separately licensed software that they have included with MySQL.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License, version 2.0, for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
*/

#include <cstdint>
#include <NDBT.hpp>
#include <NDBT_Test.hpp>
#include <HugoOperations.hpp>
#include <NdbRestarter.hpp>
#include <mgmapi.h>
#include <ndb_logevent.h>
#include <NdbTick.h>
#include <NDBT_Stats.hpp>
#include <random.h>
#include <NdbMgmd.hpp>
#include "../../src/ndbapi/NdbInfo.hpp"
#include <signaldata/DumpStateOrd.hpp>

static NdbMutex* g_msgmutex = 0;

#undef require
#define require(b) \
  if (!(b)) { \
    NdbMutex_Lock(g_msgmutex); \
    g_err << "ABORT: " << #b << " failed at line " << __LINE__ << endl; \
    NdbMutex_Unlock(g_msgmutex); \
    abort(); \
  }

#define CHK1(b) \
  if (!(b)) { \
    NdbMutex_Lock(g_msgmutex); \
    g_err << "ERROR: " << #b << " failed at line " << __LINE__ << endl; \
    NdbMutex_Unlock(g_msgmutex); \
    result = NDBT_FAILED; \
    break; \
  }

#define CHK2(b, e) \
  if (!(b)) { \
    NdbMutex_Lock(g_msgmutex); \
    g_err << "ERROR: " << #b << " failed at line " << __LINE__ \
          << ": " << e << endl; \
    NdbMutex_Unlock(g_msgmutex); \
    result = NDBT_FAILED; \
    break; \
  }

#define CHK3(b, e) \
  if (!(b)) { \
    NdbMutex_Lock(g_msgmutex); \
    g_err << "ERROR: " << #b << " failed at line " << __LINE__ \
          << ": " << e << endl; \
    NdbMutex_Unlock(g_msgmutex); \
    return NDBT_FAILED;          \
  }

#define info(x) \
  do { \
    NdbMutex_Lock(g_msgmutex); \
    g_info << x << endl; \
    NdbMutex_Unlock(g_msgmutex); \
  } while (0)

static const int g_tabmax = 3;
static const char* g_tabname[g_tabmax] = { "T1", "T2", "T4" };
static const NdbDictionary::Table* g_tabptr[g_tabmax] = { 0, 0, 0 };

static int
runCreate(NDBT_Context* ctx, NDBT_Step* step)
{
  Ndb* pNdb = GETNDB(step);
  NdbDictionary::Dictionary* pDic = pNdb->getDictionary();
  int result = NDBT_OK;
  int tabmask = ctx->getProperty("TABMASK", (Uint32)0);

  for (int i = 0; i < g_tabmax; i++)
  {
    if (!(tabmask & (1 << i)))
      continue;
    const char* tabname = g_tabname[i];
    (void)pDic->dropTable(tabname);

    const NdbDictionary::Table* pTab = NDBT_Tables::getTable(tabname);
    require(pTab != 0);
    NdbDictionary::Table tab2(*pTab);
    // make sure to hit all log parts
    tab2.setFragmentType(NdbDictionary::Object::FragAllLarge);
    //tab2.setMaxRows(100000000);
    CHK2(pDic->createTable(tab2) == 0, pDic->getNdbError());

    g_tabptr[i] = pDic->getTable(tabname);
    require(g_tabptr[i] != 0);
    info("created " << tabname);
  }

  return result;
}

static int
runDrop(NDBT_Context* ctx, NDBT_Step* step)
{
  Ndb* pNdb = GETNDB(step);
  NdbDictionary::Dictionary* pDic = pNdb->getDictionary();
  int result = NDBT_OK;
  int tabmask = ctx->getProperty("TABMASK", (Uint32)0);

  for (int i = 0; i < g_tabmax; i++)
  {
    if (!(tabmask & (1 << i)))
      continue;
    const char* tabname = g_tabname[i];
    if (g_tabptr[i] != 0)
    {
      CHK2(pDic->dropTable(tabname) == 0, pDic->getNdbError());
      g_tabptr[i] = 0;
      info("dropped " << tabname);
    }
  }

  return result;
}

// 0-writer has not seen 410 error
// 1-writer sees 410 error
// 2-longtrans has rolled back

static int
get_err410(NDBT_Context* ctx)
{
  int v = (int)ctx->getProperty("ERR410", (Uint32)0);
  require(v == 0 || v == 1 || v == 2);
  return v;
}

static void
set_err410(NDBT_Context* ctx, int v)
{
  require(v == 0 || v == 1 || v == 2);
  require(get_err410(ctx) != v);
  ctx->setProperty("ERR410", (Uint32)v);
}

static int
runLongtrans(NDBT_Context* ctx, NDBT_Step* step)
{
  Ndb* pNdb = GETNDB(step);
  int result = NDBT_OK;
  const int sleep410 = ctx->getProperty("SLEEP410", (Uint32)0);

  const NdbDictionary::Table* pTab = g_tabptr[0];
  require(pTab != 0);

  info("longtrans: start");
  int loop = 0;
  while (!ctx->isTestStopped())
  {
    info("longtrans: loop " << loop);
    HugoOperations ops(*pTab);
    ops.setQuiet();
    CHK2(ops.startTransaction(pNdb) == 0, ops.getNdbError());
    CHK2(ops.pkInsertRecord(pNdb, 0, 1, 0) == 0, ops.getNdbError());

    while (!ctx->isTestStopped())
    {
      int v = get_err410(ctx);
      require(v == 0 || v == 1);
      if (v != 0)
      {
        info("longtrans: 410 seen");
        if (sleep410 > 0)
        {
          info("longtrans: sleep " << sleep410);
          sleep(sleep410);
        }

        CHK2(ops.execute_Rollback(pNdb) == 0, ops.getNdbError());
        ops.closeTransaction(pNdb);
        info("longtrans: rollback done");
        set_err410(ctx, 2);

        while (!ctx->isTestStopped())
        {
          int v = get_err410(ctx);
          if (v != 0)
          {
          }
          else
          {
            info("longtrans: 410 cleared");
            break;
          }
          sleep(1);
        }
        break;
      }
      sleep(1);
    }
    CHK1(result == NDBT_OK);

    if (ops.getTransaction() != NULL)
    {
      info("longtrans: close leftover transaction");
      ops.closeTransaction(pNdb);
    }

    loop++;
  }

  info("longtrans: stop");
  return result;
}

static int
run_write_ops(NDBT_Context* ctx, NDBT_Step* step, int upval, NdbError& err, bool abort_on_error=false)
{
  Ndb* pNdb = GETNDB(step);
  const int records = ctx->getNumRecords();
  int result = NDBT_OK;

  const NdbDictionary::Table* pTab = g_tabptr[1];
  require(pTab != 0);

  while (!ctx->isTestStopped())
  {
    HugoOperations ops(*pTab);
    ops.setQuiet();
    CHK2(ops.startTransaction(pNdb) == 0, ops.getNdbError());

    for (int record = 0; record < records; record++)
    {
      CHK2(ops.pkWriteRecord(pNdb, record, 1, upval) == 0, ops.getNdbError());
    }
    CHK1(result == NDBT_OK);

    int ret = ops.execute_Commit(pNdb);
    err = ops.getNdbError();
    ops.closeTransaction(pNdb);

    if (ret == 0)
      break;

    require(err.code != 0);
    CHK2(err.status == NdbError::TemporaryError, err);

    if (abort_on_error)
    {
      g_info << "Temporary error " << err.code << " during write" << endl;
      result = NDBT_FAILED;
      break;
    }

    if (err.code == 410)
      break;

    info("write: continue on " << err);
    NdbSleep_MilliSleep(100);
  }

  return result;
}

static int
runWriteOK(NDBT_Context* ctx, NDBT_Step* step)
{
  int result = NDBT_OK;

  info("write: start");
  int loop = 0;
  int upval = 0;

  while (!ctx->isTestStopped())
  {
    if (loop % 100 == 0)
      info("write: loop " << loop);

    NdbError err;
    CHK2(run_write_ops(ctx, step, upval++, err) == 0, err);
    require(err.code == 0 || err.code == 410);
    CHK2(err.code == 0, err);
    NdbSleep_MilliSleep(100);

    loop++;
  }

  return result;
}

static int
runWrite410(NDBT_Context* ctx, NDBT_Step* step)
{
  int result = NDBT_OK;
  const int loops = ctx->getNumLoops();

  info("write: start");
  int loop = 0;
  int upval = 0;

  while (loop < loops && !ctx->isTestStopped())
  {
    info("write: loop " << loop);

    while (!ctx->isTestStopped())
    {
      NdbError err;
      CHK2(run_write_ops(ctx, step, upval++, err) == 0, err);
      if (err.code != 0)
      {
        require(err.code == 410);
        info("write: setting 410");
        set_err410(ctx, 1);
        break;
      }
      NdbSleep_MilliSleep(100);
    }

    while (1)
    {
      int v = get_err410(ctx);
      if (v != 2)
      {
        require(v == 1);
      }
      else
      {
        info("write: longtrans rollback seen");
        break;
      }
      sleep(1);
    }

    while (!ctx->isTestStopped())
    {
      NdbError err;
      CHK2(run_write_ops(ctx, step, upval++, err) == 0, err);
      if (err.code == 0)
      {
        info("write: clearing 410");
        set_err410(ctx, 0);
        break;
      }
      require(err.code == 410);
      NdbSleep_MilliSleep(100);
    }

    loop++;
  }

  info("write: stop test");
  ctx->stopTest();
  return result;
}

struct OpLat {
  const int m_op;
  const int m_repeat;
  // 2: 0-410 off 1-410 on
  // 3: 0-op ok 1-op 410 2-op other temp error
  NDBT_Stats m_lat[2][3];
  OpLat(int op, int repeat) :
    m_op(op),
    m_repeat(repeat) {}
};

static void
run_latency_report(const OpLat* oplist, int opcnt)
{
  for (int i = 0; i < opcnt; i++)
  {
    const OpLat& oplat = oplist[i];
    printf("optype: %d\n", oplat.m_op);
    for (int i0 = 0; i0 < 2; i0++)
    {
      printf("410 off/on: %d\n", i0);
      printf("op status ok / 410 / other temp error:\n");
      for (int i1 = 0; i1 < 3; i1++)
      {
        const NDBT_Stats& lat = oplat.m_lat[i0][i1];
        printf("count: %d", lat.getCount());
        if (lat.getCount() > 0)
        {
          printf(" mean: %.2f min: %.2f max: %.2f stddev: %.2f",
                 lat.getMean(), lat.getMin(), lat.getMax(), lat.getStddev());
        }
        printf("\n");
      }
    }
  }
}

static int
run_latency_ops(NDBT_Context* ctx, NDBT_Step* step, OpLat& oplat, int upval, NdbError& err)
{
  Ndb* pNdb = GETNDB(step);
  const int records = ctx->getNumRecords();
  int result = NDBT_OK;

  const NdbDictionary::Table* pTab = g_tabptr[2];
  require(pTab != 0);

  int record = 0;
  while (record < records && !ctx->isTestStopped())
  {
    HugoOperations ops(*pTab);
    ops.setQuiet();

    const NDB_TICKS timer_start = NdbTick_getCurrentTicks();

    CHK2(ops.startTransaction(pNdb) == 0, ops.getNdbError());

    switch (oplat.m_op)
    {
      case NdbOperation::InsertRequest:
        CHK2(ops.pkInsertRecord(pNdb, record, 1, upval) == 0, ops.getNdbError());
        break;
      case NdbOperation::UpdateRequest:
        CHK2(ops.pkUpdateRecord(pNdb, record, 1, upval) == 0, ops.getNdbError());
        break;
      case NdbOperation::ReadRequest:
        CHK2(ops.pkReadRecord(pNdb, record, 1) == 0, ops.getNdbError());
        break;
      case NdbOperation::DeleteRequest:
        CHK2(ops.pkDeleteRecord(pNdb, record, 1) == 0, ops.getNdbError());
        break;
      default:
        require(false);
        break;
    }
    CHK2(result == NDBT_OK, "latency: ndbapi error at op " << oplat.m_op << " record" << record);

    int ret = ops.execute_Commit(pNdb);
    err = ops.getNdbError();
    ops.closeTransaction(pNdb);

    if (ret != 0)
    {
      require(err.code != 0);
      CHK2(err.status == NdbError::TemporaryError, err);
    }

    const NDB_TICKS timer_stop = NdbTick_getCurrentTicks();
    Uint64 tt = NdbTick_Elapsed(timer_start, timer_stop).microSec();
    require(tt > 0);
    double td = (double)tt;
    int i0 = get_err410(ctx);
    int i1 = (ret == 0 ? 0 : (err.code == 410 ? 1 : 2));
    NDBT_Stats& lat = oplat.m_lat[i0][i1];
    lat.addObservation(td);

    if (ret == 0)
      record++;
  }

  return result;
}

static int
runLatency(NDBT_Context* ctx, NDBT_Step* step)
{
  int result = NDBT_OK;

  info("latency: start");
  const int opcnt = 4;
  OpLat oplist[opcnt] = {
    OpLat(NdbOperation::InsertRequest, 1),
    OpLat(NdbOperation::UpdateRequest, 10),
    OpLat(NdbOperation::ReadRequest, 5),
    OpLat(NdbOperation::DeleteRequest, 1)
  };

  int loop = 0;
  int upval = 0;

  while (!ctx->isTestStopped())
  {
    info("latency: loop " << loop);
    for (int i = 0; i < opcnt && !ctx->isTestStopped(); i++)
    {
      OpLat& oplat = oplist[i];
      NdbError err;
      for (int j = 0; j < oplat.m_repeat && !ctx->isTestStopped(); j++)
      {
        CHK2(run_latency_ops(ctx, step, oplat, upval, err) == 0, err);
        upval++;
      }
      CHK1(result == NDBT_OK);
    }
    loop++;
  }

  run_latency_report(oplist, opcnt);
  return result;
}

// gnu bitmap madness
#undef reset
#undef isset

struct LogPos {
  int m_fileno;
  int m_mb;
  int m_pos; // absolute mb
  friend NdbOut& operator<<(NdbOut&, const LogPos&);
};

NdbOut&
operator<<(NdbOut& out, const LogPos& pos)
{
  out << pos.m_fileno;
  out << "." << pos.m_mb;
  out << "-" << pos.m_pos;
  return out;
}

struct LogPart {
  int m_partno; // for print
  bool m_set;
  int m_files; // redo files
  int m_filesize; // mb
  int m_total; // m_files * m_filesize
  int m_free; // mb
  int m_used; // mb
  LogPos m_head;
  LogPos m_tail;
  int m_fileused;
  void reset() {
    m_set = false;
  }
  bool isset() {
    return m_set;
  }
  friend NdbOut& operator<<(NdbOut&, const LogPart&);
};

NdbOut&
operator<<(NdbOut& out, const LogPart& lp)
{
  out << "part " << lp.m_partno << ":";
  out << " files=" << lp.m_files;
  out << " filesize=" << lp.m_filesize;
  out << " total=" << lp.m_total;
  out << " free=" << lp.m_free;
  out << " head: " << lp.m_head;
  out << " tail: "  << lp.m_tail;
  out << " fileused=" << lp.m_fileused;
  return out;
}

struct LogNode {
  int m_nodeid;
  LogPart m_logpart[4];
  int m_files; // from LogPart (must be same for all)
  int m_filesize;
  int m_minfds; // min and max FDs in page 0
  int m_maxfds; // LQH uses max FDs by default
  void reset() {
    for (int i = 0; i < 4; i++) {
      m_logpart[i].m_partno = i;
      m_logpart[i].reset();
    }
  }
  bool isset() {
    for (int i = 0; i < 4; i++)
      if (!m_logpart[i].isset())
        return false;
    return true;
  }
};

struct LogInfo {
  int m_nodes;
  LogNode* m_lognode;
  int m_files; // from LogNode (config is same for all in these tests)
  int m_filesize;
  int m_minfds;
  int m_maxfds;
  LogInfo(int nodes) {
    m_nodes = nodes;
    m_lognode = new LogNode [nodes];
    reset();
  }
  ~LogInfo() {
    m_nodes = 0;
    delete [] m_lognode;
  }
  void reset() {
    for (int n = 0; n < m_nodes; n++)
      m_lognode[n].reset();
  }
  bool isset() {
    for (int n = 0; n < m_nodes; n++)
      if (!m_lognode[n].isset())
        return false;
    return true;
  }
  LogNode* findnode(int nodeid) const {
    for (int n = 0; n < m_nodes; n++) {
      if (m_lognode[n].m_nodeid == nodeid)
        return &m_lognode[n];
    }
    return 0;
  }
  void copyto(LogInfo& li2) const {
    require(m_nodes == li2.m_nodes);
    for (int n = 0; n < m_nodes; n++) {
      const LogNode& ln1 = m_lognode[n];
      LogNode& ln2 = li2.m_lognode[n];
      ln2 = ln1;
    }
  }
};

static int
get_nodestatus(NdbMgmHandle h, LogInfo& li)
{
  int result = 0;
  ndb_mgm_cluster_state* cs = 0;

  do
  {
    require(h != 0);
    CHK2((cs = ndb_mgm_get_status(h)) != 0, ndb_mgm_get_latest_error_msg(h));
    int n = 0;
    for (int i = 0; i < cs->no_of_nodes; i++)
    {
      ndb_mgm_node_state& ns = cs->node_states[i];
      if (ns.node_type == NDB_MGM_NODE_TYPE_NDB)
      {
        // called only when all started
        CHK1(ns.node_status == NDB_MGM_NODE_STATUS_STARTED);
        CHK1(n < li.m_nodes);

        LogNode& ln = li.m_lognode[n];
        ln.m_nodeid = ns.node_id;
        info("node " << n << ": " << ln.m_nodeid);
        n++;
      }
      CHK1(result == 0);
    }
    CHK1(n == li.m_nodes);
  }
  while (0);

  free(cs);
  info("get_nodestatus result=" << result);
  return result;
}

static int
get_redostatus(NdbMgmHandle h, LogInfo& li)
{
  int result = 0;

  do
  {
    li.reset();
    require(h != 0);

    int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
    NdbLogEventHandle evh = 0;
    CHK2((evh = ndb_mgm_create_logevent_handle(h, filter)) != 0, ndb_mgm_get_latest_error_msg(h));

    for (int n = 0; n < li.m_nodes; n++)
    {
      int dump[] = { 2399 };
      const LogNode& ln = li.m_lognode[n];
      struct ndb_mgm_reply reply;
      CHK2(ndb_mgm_dump_state(h, ln.m_nodeid, dump, 1, &reply) == 0, ndb_mgm_get_latest_error_msg(h));
    }
    CHK1(result == 0);

    int maxcnt = 4 * li.m_nodes;
    int rescnt = 0;
    time_t start = time(0);
    int maxwait = 5;

    while (rescnt < maxcnt && time(0) < start + maxwait)
    {
      while (1)
      {
        int res;
        ndb_logevent ev;
        int msec = 100;
        CHK2((res = ndb_logevent_get_next(evh, &ev, msec)) >= 0, ndb_mgm_get_latest_error_msg(h));
        if (res == 0)
        break;
      if (ev.type != NDB_LE_RedoStatus)
          continue;

        LogNode* lnptr = 0;
        CHK2((lnptr = li.findnode(ev.source_nodeid)) != 0, "unknown nodeid " << ev.source_nodeid);
        LogNode& ln = *lnptr;

        const ndb_logevent_RedoStatus& rs = ev.RedoStatus;
        CHK1(rs.log_part < 4);
        LogPart& lp = ln.m_logpart[rs.log_part];

        CHK1(!lp.m_set);
        LogPos& head = lp.m_head;
        LogPos& tail = lp.m_tail;
        lp.m_files = rs.no_logfiles;
        lp.m_filesize = rs.logfilesize;
        head.m_fileno = rs.head_file_no;
        head.m_mb = rs.head_mbyte;
        head.m_pos = head.m_fileno * lp.m_filesize + head.m_mb;
        tail.m_fileno = rs.tail_file_no;
        tail.m_mb = rs.tail_mbyte;
        tail.m_pos = tail.m_fileno * lp.m_filesize + tail.m_mb;
        CHK1(rs.total_hi == 0 && rs.total_lo < (1u << 31));
        lp.m_total = rs.total_lo;
        CHK1(rs.free_hi == 0 && rs.free_lo < (1u << 31));
        lp.m_free = rs.free_lo;
        lp.m_used = lp.m_total - lp.m_free;

        // set number of files used
        if (tail.m_fileno < head.m_fileno)
        {
          lp.m_fileused = head.m_fileno - tail.m_fileno + 1;
        }
        else if (tail.m_fileno > head.m_fileno)
        {
          lp.m_fileused = lp.m_files - (tail.m_fileno - head.m_fileno - 1);
        }
        else if (tail.m_pos < head.m_pos)
        {
          lp.m_fileused = 1;
        }
        else if (tail.m_pos > head.m_pos)
        {
          lp.m_fileused = lp.m_files;
        }
        else
        {
          lp.m_fileused = 0;
        }

        // sanity checks
        {
          CHK2(lp.m_total == lp.m_files * lp.m_filesize, lp);
          CHK2(head.m_fileno < lp.m_files, lp);
          CHK2(head.m_mb < lp.m_filesize, lp);
          require(head.m_pos < lp.m_total);
          CHK2(tail.m_fileno < lp.m_files, lp);
          CHK2(tail.m_mb < lp.m_filesize, lp);
          require(tail.m_pos < lp.m_total);
          CHK2(lp.m_free <= lp.m_total, lp);
          if (tail.m_pos <= head.m_pos)
          {
            CHK2(lp.m_free == lp.m_total - (head.m_pos - tail.m_pos), lp);
          }
          else
          {
            CHK2(lp.m_free == tail.m_pos - head.m_pos, lp);
          }
        }
        lp.m_set = true;
        //info("node " << ln.m_nodeid << ": " << lp);

        rescnt++;
      }
      CHK1(result == 0);
    }
    CHK1(result == 0);
    CHK2(rescnt == maxcnt, "got events " << rescnt << " != " << maxcnt);
    require(li.isset()); // already implied by counts

    for (int n = 0; n < li.m_nodes; n++)
    {
      LogNode& ln = li.m_lognode[n];
      for (int i = 0; i < 4; i++)
      {
        LogPart& lp = ln.m_logpart[i];
        if (i == 0)
        {
          ln.m_files = lp.m_files;
          ln.m_filesize = lp.m_filesize;
          CHK1(ln.m_files >= 3 && ln.m_filesize >= 4);

          // see Dblqh::execREAD_CONFIG_REQ()
          ln.m_minfds = 2;
          ln.m_maxfds = (8192 - 32 - 128) / (3 * ln.m_filesize);
          if (ln.m_maxfds > 40)
            ln.m_maxfds = 40;
          CHK1(ln.m_minfds <= ln.m_maxfds);
        }
        else
        {
          CHK1(ln.m_files == lp.m_files && ln.m_filesize == lp.m_filesize);
        }
      }

      if (n == 0)
      {
        li.m_files = ln.m_files;
        li.m_filesize = ln.m_filesize;
        li.m_minfds = ln.m_minfds;
        li.m_maxfds = ln.m_maxfds;
        require(li.m_files > 0 && li.m_filesize > 0);
        require(li.m_minfds <= li.m_maxfds);
      }
      else
      {
        CHK1(li.m_files == ln.m_files && li.m_filesize == ln.m_filesize);
        require(li.m_minfds == ln.m_minfds && li.m_maxfds == ln.m_maxfds);
      }

      CHK1(result == 0);
    }
    CHK1(result == 0);

    ndb_mgm_destroy_logevent_handle(&evh);
  }
  while (0);

  info("get_redostatus result=" << result);
  return result;
}

// get node with max redo files used in some part

struct LogUsed {
  int m_nodeidx;
  int m_nodeid;
  int m_partno;
  int m_used; // mb
  LogPos m_head;
  LogPos m_tail;
  int m_fileused;
  int m_rand; // randomize node to restart if file usage is same
  friend NdbOut& operator<<(NdbOut&, const LogUsed&);
};

NdbOut&
operator<<(NdbOut& out, const LogUsed& lu)
{
  out << "n=" << lu.m_nodeid;
  out << " p=" << lu.m_partno;
  out << " u=" << lu.m_used;
  out << " h=" << lu.m_head;
  out << " t=" << lu.m_tail;
  out << " f=" << lu.m_fileused;
  return out;
}

static int
cmp_logused(const void* a1, const void* a2)
{
  const LogUsed& lu1 = *(const LogUsed*)a1;
  const LogUsed& lu2 = *(const LogUsed*)a2;
  int k = lu1.m_fileused - lu2.m_fileused;
  if (k != 0)
  {
    // sorting by larger file usage
    return (-1) * k;
  }
  return lu1.m_rand - lu2.m_rand;
}

struct LogMax {
  int m_nodes;
  LogUsed* m_logused;
  LogMax(int nodes) {
    m_nodes = nodes;
    m_logused = new LogUsed [nodes];
  }
  ~LogMax() {
    m_nodes = 0;
    delete [] m_logused;
  }
};

static void
get_redoused(const LogInfo& li, LogMax& lx)
{
  require(li.m_nodes == lx.m_nodes);
  for (int n = 0; n < li.m_nodes; n++)
  {
    const LogNode& ln = li.m_lognode[n];
    LogUsed& lu = lx.m_logused[n];
    lu.m_used = -1;
    for (int i = 0; i < 4; i++)
    {
      const LogPart& lp = ln.m_logpart[i];
      if (lu.m_used <  lp.m_used)
      {
        lu.m_nodeidx = n;
        lu.m_nodeid = ln.m_nodeid;
        lu.m_partno = i;
        lu.m_used = lp.m_used;
        lu.m_head = lp.m_head;
        lu.m_tail = lp.m_tail;
        lu.m_fileused = lp.m_fileused;
        lu.m_rand = myRandom48(100);
      }
    }
  }
  qsort(lx.m_logused, lx.m_nodes, sizeof(LogUsed), cmp_logused);
  for (int n = 0; n + 1 < li.m_nodes; n++)
  {
    const LogUsed& lu1 = lx.m_logused[n];
    const LogUsed& lu2 = lx.m_logused[n + 1];
    require(lu1.m_fileused >= lu2.m_fileused);
  }
}

struct LogDiff {
  bool m_tailmove; // all tails since all redo parts are used
};

static void
get_redodiff(const LogInfo& li1, const LogInfo& li2, LogDiff& ld)
{
  require(li1.m_nodes == li2.m_nodes);
  ld.m_tailmove = true;
  for (int i = 0; i < li1.m_nodes; i++)
  {
    LogNode& ln1 = li1.m_lognode[i];
    LogNode& ln2 = li2.m_lognode[i];
    for (int j = 0; j < 4; j++)
    {
      LogPart& lp1 = ln1.m_logpart[j];
      LogPart& lp2 = ln2.m_logpart[j];
      if (lp1.m_tail.m_pos == lp2.m_tail.m_pos)
      {
        ld.m_tailmove = false;
      }
    }
  }
}

static int
runRestartOK(NDBT_Context* ctx, NDBT_Step* step)
{
  int result = NDBT_OK;
  const int loops = ctx->getNumLoops();
  NdbRestarter restarter;

  info("restart01: start");
  int nodes = restarter.getNumDbNodes();
  require(nodes >= 1);
  info("restart: nodes " << nodes);

  if (nodes == 1)
  {
    info("restart01: need at least 2 nodes");
    return result;
  }

  int nodeidx = myRandom48(nodes);
  int nodeid = restarter.getDbNodeId(nodeidx);
  info("restart01: using nodeid " << nodeid);

  LogInfo logInfo(nodes);

  int loop = 0;
  while (loop < loops && !ctx->isTestStopped())
  {
    info("restart01: loop " << loop);
    CHK1(get_nodestatus(restarter.handle, logInfo) == 0);

    bool fi = false;
    bool fn = false;
    bool fa = false;
    info("restart01: restart nodeid " << nodeid);
    CHK1(restarter.restartOneDbNode(nodeid, fi, fn, fa) == 0);
    CHK1(restarter.waitClusterStarted() == 0);
    info("restart01: cluster up again");

    // let write run until redo wraps (no check yet)
    sleep(300);
    loop++;
  }

  info("restart01: stop test");
  ctx->stopTest();
  return result;
}

#define g_SETFDS "SETFDS"

static int
run_write_ops(NDBT_Context* ctx, NDBT_Step* step, int cnt, int& upval, NdbError& err)
{
  int result = NDBT_OK;

  for (int i = 0; i < cnt && !ctx->isTestStopped(); i++)
  {
    CHK2(run_write_ops(ctx, step, upval++, err) == 0, err);
    if (err.code != 0)
    {
      require(err.code == 410);
      break;
    }
  }

  return result;
}

static int
get_newfds(const LogInfo& li)
{
  require(li.m_files >= 3);
  int newfds = li.m_files - 1;
  require(newfds >= li.m_minfds);
  // twice to prefer smaller
  newfds = li.m_minfds + myRandom48(newfds - li.m_minfds + 1);
  newfds = li.m_minfds + myRandom48(newfds - li.m_minfds + 1);
  return newfds;
}

static int
get_limfds(const LogInfo& li, int newfds)
{
  int off = li.m_files - newfds;
  require(off > 0);
  off= myRandom48(off + 1);
  off = myRandom48(off + 1);
  return newfds + off;
}

static int
run_restart(NDBT_Context* ctx, NDBT_Step* step, int nodeid, bool fi)
{
  int result = NDBT_OK;
  int setfds = ctx->getProperty(g_SETFDS, (Uint32)0xff);
  require(setfds != 0xff);
  int dump[2] = { 2396, setfds };
  NdbRestarter restarter;
  info("run_restart: nodeid=" << nodeid << " initial=" << fi << " setfds=" << setfds);

  /*
   * When starting non-initial the node(s) have already some setfds
   * but it is lost on restart.  We must dump the same setfds again.
   */
  do
  {
    bool fn = true;
    bool fa = false;
    if (nodeid == 0)
    {
      info("run_restart: restart all nostart");
      CHK1(restarter.restartAll(fi, fn, fa) == 0);
      info("run_restart: wait nostart");
      CHK1(restarter.waitClusterNoStart() == 0);
      info("run_restart: dump " << dump[0] << " " << dump[1]);
      CHK1(restarter.dumpStateAllNodes(dump, 2) == 0);
      info("run_restart: start all");
      CHK1(restarter.startAll() == 0);
    }
    else
    {
      info("run_restart: restart node nostart");
      CHK1(restarter.restartOneDbNode(nodeid, fi, fn, fa) == 0);
      info("run_restart: wait nostart");
      CHK1(restarter.waitNodesNoStart(&nodeid, 1) == 0);
      info("run_restart: dump " << dump[0] << " " << dump[1]);
      CHK1(restarter.dumpStateAllNodes(dump, 2) == 0);
      info("run_restart: start all");
      CHK1(restarter.startAll() == 0);
    }
    info("run_restart: wait started");
    CHK1(restarter.waitClusterStarted() == 0);
    info("run_restart: started");
  }
  while (0);

  info("run_restart: result=" << result);
  return result;
}

static int
run_start_lcp(NdbRestarter& restarter)
{
  int result = NDBT_OK;
  int dump[1] = { 7099 };
  do
  {
    CHK1(restarter.dumpStateAllNodes(dump, 1) == 0);
  }
  while (0);
  info("run_start_lcp: result=" << result);
  return result;
}

/*
 * Start long trans to freeze log tail.  Run writes until over
 * FDs stored in zero-pages (may hit 410).  Run restart (which
 * aborts long trans) and verify log tail moves (must not hit 410).
 * At start and every 5 loops do initial restart and DUMP to
 * change number of FDs stored to a random number between 2
 * (minimum) and number of redo log files minus 1.
 */

static int
runRestartFD(NDBT_Context* ctx, NDBT_Step* step)
{
  Ndb* pNdb = GETNDB(step);
  int result = NDBT_OK;
  const int loops = ctx->getNumLoops();
  const bool srflag = ctx->getProperty("SRFLAG", (Uint32)0);
  NdbRestarter restarter;

  info("restart: start srflag=" << srflag);
  int nodes = restarter.getNumDbNodes();
  require(nodes >= 1);
  info("restart: nodes " << nodes);

  if (nodes == 1 && !srflag)
  {
    info("restart: need at least 2 nodes");
    return result;
  }

  LogInfo logInfo(nodes);
  LogInfo logInfo2(nodes);
  LogMax logMax(nodes);
  LogDiff logDiff;

  const NdbDictionary::Table* pTab = 0;

  int upval = 0;
  int loop = 0;
  int newfds = 0;
  int limfds = 0;
  while (loop < loops && !ctx->isTestStopped())
  {
    info("restart: loop " << loop);
    if (loop % 5 == 0)
    {
      CHK1(get_nodestatus(restarter.handle, logInfo) == 0);
      CHK1(get_redostatus(restarter.handle, logInfo) == 0);

      // set new cmaxLogFilesInPageZero in all LQH nodes
      newfds = get_newfds(logInfo);
      ctx->setProperty(g_SETFDS, (Uint32)newfds);
      bool nodeid = 0; // all nodes
      bool fi = true; // initial start
      CHK1(run_restart(ctx, step, nodeid, fi) == 0);

      CHK1(runCreate(ctx, step) == 0);
      pTab = g_tabptr[0];
      require(pTab != 0);
    }

    // start long trans
    HugoOperations ops(*pTab);
    ops.setQuiet();
    CHK2(ops.startTransaction(pNdb) == 0, ops.getNdbError());
    for (int i = 0; i < 100; i++)
    {
      CHK2(ops.pkInsertRecord(pNdb, i, 1, 0) == 0, ops.getNdbError());
    }
    CHK2(ops.execute_NoCommit(pNdb) == 0, ops.getNdbError());

    // randomize load1 limit a bit upwards
    limfds = get_limfds(logInfo, newfds);
    // may be up to logInfo.m_files and then hit 410
    require(newfds <= limfds && limfds <= logInfo.m_files);
    info("restart: newfds=" << newfds << " limfds=" << limfds);

    // start load1 loop
    info("restart: load1");
    while (!ctx->isTestStopped())
    {
      info("restart: load1 at " << upval);
      NdbError err;
      int cnt = 100 + myRandom48(100);
      CHK1(run_write_ops(ctx, step, cnt, upval, err) == 0);

      CHK1(get_redostatus(restarter.handle, logInfo) == 0);
      get_redoused(logInfo, logMax);
      info("restart: load1 max: " << logMax.m_logused[0]);
      info("restart: load1 min: " << logMax.m_logused[nodes - 1]);

      if (err.code != 0)
      {
        require(err.code == 410);
        info("restart: break load1 on 410");
        break;
      }

      int fileused = logMax.m_logused[0].m_fileused;
      if (fileused > limfds)
      {
        info("restart: break load1 on file usage > FDs");
        break;
      }
    }
    CHK1(result == NDBT_OK);

    // restart
    if (srflag)
    {
      int nodeid = 0;
      int fi = false;
      CHK1(run_restart(ctx, step, nodeid, fi) == 0);
    }
    else
    {
      int nodeid = logMax.m_logused[0].m_nodeid;
      int fi = false;
      CHK1(run_restart(ctx, step, nodeid, fi) == 0);
    }

    // start load2 loop
    info("restart: load2");
    CHK1(get_redostatus(restarter.handle, logInfo) == 0);
    logInfo.copyto(logInfo2);

    // should be fast but allow for slow machines
    int retry2 = 0;
    while (!ctx->isTestStopped())
    {
      info("restart: load2 at " << upval);
      NdbError err;
      int cnt = 100 + myRandom48(100);
      CHK1(run_write_ops(ctx, step, cnt, upval, err) == 0);

      CHK1(get_redostatus(restarter.handle, logInfo2) == 0);
      get_redoused(logInfo2, logMax);
      info("restart: load2 max: " << logMax.m_logused[0]);
      info("restart: load2 min: " << logMax.m_logused[nodes - 1]);

      require(err.code == 0 || err.code == 410);
      CHK2(retry2 < 60 || err.code == 0, err);

      get_redodiff(logInfo, logInfo2, logDiff);
      if (logDiff.m_tailmove)
      {
        info("restart: break load2");
        break;
      }

      info("restart: retry2=" << retry2);
      if (retry2 % 5 == 0)
      {
        CHK1(run_start_lcp(restarter) == 0);
        NdbSleep_MilliSleep(1000);
      }
      retry2++;
    }
    CHK1(result == NDBT_OK);

    sleep(1 + myRandom48(10));
    loop++;
  }

  info("restart: stop test");
  ctx->stopTest();
  return result;
}

static int
runResetFD(NDBT_Context* ctx, NDBT_Step* step)
{
  int result = NDBT_OK;
  int oldfds = ctx->getProperty(g_SETFDS, (Uint32)-1);
  do
  {
    if (oldfds == -1)
    {
      // never changed (some step failed)
      break;
    }
    ctx->setProperty(g_SETFDS, (Uint32)0);
    CHK1(run_restart(ctx, step, 0, true) == 0);
  }
  while (0);
  return result;
}

static int
resizeRedoLog(NDBT_Context* ctx, NDBT_Step* step)
{
  int result = NDBT_FAILED;
  Config conf;
  NdbRestarter restarter;
  Uint32 noOfLogFiles = ctx->getProperty("REDOLOGCOUNT", (Uint32)16);
  Uint32 logFileSize = ctx->getProperty("REDOLOGSIZE", (Uint32)16*1024*1024);
  Uint32 LCPinterval = ctx->getProperty("LCPINTERVAL", (Uint32)20);
  Uint32 defaultNoOfLogFiles = 0, defaultLogFileSize = 0;
  Uint32 defaultLCPinterval = 0;

  do
  {
    NdbMgmd mgmd;
    if(!mgmd.connect())
    {
      g_err << "Failed to connect to ndb_mgmd." << endl;
      break;
    }
    if (!mgmd.get_config(conf))
    {
      g_err << "Failed to get config from ndb_mgmd." << endl;
      break;
    }

    g_err << "Setting NoOfFragmentLogFiles = " << noOfLogFiles
          << " FragmentLogFileSize = " << logFileSize
          << " TimeBetweenLCP " << LCPinterval << endl;
    ConfigValues::Iterator iter(conf.m_configValues->m_config);
    for (int nodeid = 1; nodeid < MAX_NODES; nodeid ++)
    {
      Uint32 oldValue;
      if (!iter.openSection(CFG_SECTION_NODE, nodeid))
        continue;
      if (iter.get(CFG_DB_NO_REDOLOG_FILES, &oldValue))
      {
        iter.set(CFG_DB_NO_REDOLOG_FILES, noOfLogFiles);
        if(defaultNoOfLogFiles == 0)
        {
           defaultNoOfLogFiles = oldValue;
        }
        else if(oldValue != defaultNoOfLogFiles)
        {
          g_err << "NoOfFragmentLogFiles is not consistent across nodes" << endl;
          break;
        }
      }
      if(iter.get(CFG_DB_REDOLOG_FILE_SIZE, &oldValue))
      {
        iter.set(CFG_DB_REDOLOG_FILE_SIZE, logFileSize);
        if(defaultLogFileSize == 0)
        {
           defaultLogFileSize = oldValue;
        }
        else if(oldValue != defaultLogFileSize)
        {
          g_err << "FragmentLogFileSize is not consistent across nodes" << endl;
          break;
        }
      }
      if(iter.get(CFG_DB_LCP_INTERVAL, &oldValue))
      {
        iter.set(CFG_DB_LCP_INTERVAL, LCPinterval);
        if(defaultLCPinterval == 0)
        {
           defaultLCPinterval = oldValue;
        }
        else if(oldValue != defaultLCPinterval)
        {
          g_err << "defaultLCPinterval is not consistent across nodes" << endl;
          break;
        }
      }
      iter.closeSection();
    }

    // Save old config values
    ctx->setProperty("REDOLOGCOUNT", (Uint32)defaultNoOfLogFiles);
    ctx->setProperty("REDOLOGSIZE", (Uint32)defaultLogFileSize);
    ctx->setProperty("LCPINTERVAL", (Uint32)defaultLCPinterval);

    if (!mgmd.set_config(conf))
    {
      g_err << "Failed to set config in ndb_mgmd." << endl;
      break;
    }

    g_err << "Restarting nodes to apply config change..." << endl;
    sleep(3); //Give MGM server time to restart
    if (restarter.restartAll(true))
    {
      g_err << "Failed to restart node." << endl;
      break;
    }
    if (restarter.waitClusterStarted(120) != 0)
    {
      g_err << "Failed waiting for node started." << endl;
      break;
    }
    g_err << "Nodes restarted with new config" << endl;
    result = NDBT_OK;
  } while (0);
  return result;
}

static int
start_open_transaction(NDBT_Context* ctx, NDBT_Step* step, HugoOperations **ops)
{
  /**
   * Ensure we don't use the same record for the open transaction as for
   * the ones filling up the REDO log. In that case we get into a deadlock,
   * we solve this by using a different table for the pending transaction.
   */
  const NdbDictionary::Table* pTab = g_tabptr[0];

  g_info << "Starting a write and leaving it open so the pending "
        << "COMMIT indefinitely delays redo log trimming"
        << pTab <<endl;

  *ops = new HugoOperations(*pTab);
  CHK3((*ops) != NULL, "Could not create new HugoOperations");

  (*ops)->setQuiet();

  Ndb* pNdb = GETNDB(step);
  CHK3((*ops)->startTransaction(pNdb) == 0,
       "Failed to start transaction: error ");
  int upval = 0;
  CHK3((*ops)->pkWriteRecord(pNdb, 0, 1, upval++) == 0,
       (*ops)->getNdbError());
  CHK3((*ops)->execute_NoCommit(pNdb) == 0,
       "Error: failed to execute NoCommit");

  return NDBT_OK;
}

static int
runWriteWithRedoFull(NDBT_Context* ctx, NDBT_Step* step)
{
  int upval = 0;
  NdbRestarter restarter;
  Ndb* pNdb = GETNDB(step);

  // Block redo logpart being trimmed by holding a transaction open
  HugoOperations *ops = NULL;
  start_open_transaction(ctx, step, &ops);

  g_err << "Starting PK insert load..." << endl;
  int loop = 0;
  int result = NDBT_FAILED;
  while (!ctx->isTestStopped())
  {
    if (loop % 100 == 0)
    {
      info("write: loop " << loop);
    }

    NdbError err;
    run_write_ops(ctx, step, upval++, err, true);
    if(err.code == 410)
    {
      g_err << "Redo log full, new requests aborted as expected" << endl;
      result = NDBT_OK;
      break;
    }
    else if(err.code == 266)
    {
      g_err << "Error; redo log full, but new requests still allowed to queue" << endl;
      break;
    }
    else if(err.code != 0)
    {
      g_err << "Error: write failed with unexpected error " << err.code << endl;
      break;
    }
    loop++;
  }

  g_err << "Executing pending COMMIT so that redo log can be trimmed..." << endl;
  int ret = ops->execute_Commit(pNdb);
  if(ret != 0)
  {
    g_err << "Error: failed to execute commit" << ops->getNdbError() << endl;
    result = NDBT_FAILED;
  }
  ops->closeTransaction(pNdb);
  return result;
}

/**
 * Run one or more LCP requests when signalled until stop is signalled
 */
int
runLCP(NDBT_Context* ctx, NDBT_Step* step)
{
  while ((ctx->getProperty("stop_lcp", (Uint32)0)) == 0 &&
         !ctx->isTestStopped())
  {
    NdbSleep_MilliSleep(1000);
    // Check whether start lcp is signalled
    Uint32 lcps = 0;
    if ((lcps = ctx->getProperty("start_lcp", (Uint32)0)) == 0)
      continue;

    // Perform LCP the number of times indicated by 'lcps'
    ctx->setProperty("lcps_done", (Uint32)0);
    NdbRestarter restarter;
    int dump[] = { DumpStateOrd::DihStartLcpImmediately };
    restarter.getNumDbNodes();

    int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
    NdbLogEventHandle handle =
      ndb_mgm_create_logevent_handle(restarter.handle, filter);

    struct ndb_logevent event;

    for (Uint32 i = 0; i < lcps; i++)
    {
      CHK3(restarter.dumpStateAllNodes(dump, 1) == 0, "Could not start LCP");
      while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
            event.type != NDB_LE_LocalCheckpointStarted);
      while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
            event.type != NDB_LE_LocalCheckpointCompleted);
    }

    // Signal lcps done
    ctx->setProperty("lcps_done", (Uint32)1);
    ctx->setProperty("start_lcp", (Uint32)0);
  }

  return NDBT_OK;
}

/**
 * If the given logpart_with_maxusage and nodeid are invalid,
 *   return the maximum REDO log usage and the node id and logpart
 *      which is having it. Fails if two distinct logparts
 *      (except primary and backup) have same usage.
 * else return the REDO log usage of the given nodeid and logpart.
 */
static int
get_redo_logpart_maxusage(NDBT_Context* ctx, Uint32 &nodeid,
                          Uint32 &logpart_with_maxusage)
{
  NdbInfo ndbinfo(&ctx->m_cluster_connection, "ndbinfo/");
  if (!ndbinfo.init())
  {
    g_err << "ndbinfo.init failed" << endl;
    return -1;
  }

  const NdbInfo::Table* table;
  if (ndbinfo.openTable("ndbinfo/logspaces", &table) != 0)
  {
    g_err << "Failed to openTable(logspaces)" << endl;
    return -1;
  }

  NdbInfoScanOperation* scanOp = NULL;
  if (ndbinfo.createScanOperation(table, &scanOp))
  {
    g_err << "No NdbInfoScanOperation" << endl;
    return -1;
  }

  if (scanOp->readTuples() != 0)
  {
    g_err << "scanOp->readTuples failed" << endl;
    return -1;
  }

  const NdbInfoRecAttr* nodeid_colval = scanOp->getValue("node_id");
  const NdbInfoRecAttr* logtype_colval = scanOp->getValue("log_type");
  const NdbInfoRecAttr* logpart_colval = scanOp->getValue("log_part");
  const NdbInfoRecAttr* total_colval = scanOp->getValue("total");
  const NdbInfoRecAttr* used_colval = scanOp->getValue("used");

  if(scanOp->execute() != 0)
  {
    g_err << "scanOp->execute failed" << endl;
    return -1;
  }

  // Help variables to trace the max usage and the log part/node id having it
  int max_usage = -1, usage = -1;
  Uint32 max_logpart = UINT32_MAX;
  Uint32 max_node_id = 0;

  while(scanOp->nextResult() == 1)
  {
    Uint32 node_id = nodeid_colval->u_32_value();
    Uint64 total = total_colval->u_64_value();
    Uint64 used = used_colval->u_64_value();
    Uint32 logtype = logtype_colval->u_32_value();
    Uint32 logpart = logpart_colval->u_32_value();

    /* The result row can be skipped if
     * - it is NOT a redo log data or
     * - it is NOT the row the test has requested to retrieve
     */
    if (logtype != 0 || // Not a redo log
        (nodeid != 0 && logpart_with_maxusage != UINT32_MAX &&
         nodeid != node_id && logpart_with_maxusage != logpart))
    {
      continue;
    }

    if (total != 0)
    {
      usage = (int)((100 * used) / total);

      g_info << "nodeid " << node_id << " " << nodeid
            << " logpart " << logpart << " " << logpart_with_maxusage
            << " usage " << usage << " " << max_usage << endl;

      // Requested row is found
      if (node_id == nodeid && logpart == logpart_with_maxusage)
      {
        g_err << "Row with requested nodeid " << nodeid << " and logpart " << logpart
              << "  is found. Usage " << usage
              << " used = " << used << " total = " << total << endl;
        return usage;
      }

      /* The test blocks one logpart from being trimmed.
       * The following check may become true when LCP races with the load.
       * The probability is less for runCheckLCPStartsAfterSR
       * than for runCheckLCPStartsAfterNR,
       * since the latter calls this method without LCPs performed.
       */
      if (usage > 0 && usage == max_usage &&
          max_logpart != logpart && max_node_id != node_id)
      {
        g_err << "Two non-peer log parts having same usage is not handled" << endl;
        return -1;
      }

      // Find the max usage and the corresponding nodeid/logpart.
      // Primary and backup logparts will be full. Return the
      // usage of the last row retrieved from ndbinfo/logspace.
      if (usage > max_usage)
      {
        max_usage = usage;
        max_logpart = logpart;
        max_node_id = node_id;
      }
    }
  }
  ndbinfo.releaseScanOperation(scanOp);
  ndbinfo.closeTable(table);

  // Return the results
  logpart_with_maxusage = max_logpart;
  nodeid = max_node_id;

  g_err << "get_redo_logpart_maxusage returns: nodeid " << nodeid
        << " lp " << logpart_with_maxusage
        << " usage " << max_usage << endl;

  if (max_usage <= 0)
    g_err << " The test could not fill the redo log. Redo log usage : usage " << usage
          << " max usage " << max_usage << endl;

  return max_usage;
}

static int
redologpart_is_trimmed(NDBT_Context* ctx, int usage_before,
                   Uint32 full_logpart, Uint32 nodeid)
{
  // Check whether the redo log is trimmed after system or node restart.
  // Wait max 2/3 of max LCP_INTERVAL (20) seconds for an lcp to
  // trim the logpart that was full. Slow machines may need more time.
  int retries = 20;
  int usage_after = -1;
  do
  {
    NdbSleep_MilliSleep(1000);
    usage_after = get_redo_logpart_maxusage(ctx, nodeid, full_logpart);
    CHK3(usage_after != -1, "Could not retrieve redo log usage");
    g_info << "Retrying : Usage before : " << usage_before
          << " Usage after : " << usage_after
          << " Retries " << 20-retries << endl;
  } //while (retries-- > 0 && usage_after >= usage_before);
  while (retries-- > 0 && usage_after > 0);


  if (usage_after > 0)
  {
    g_err << "Redo log is not trimmed " <<  20 - retries
          << " seconds after restart. "
          << " Usage before : " << usage_before
          << " Usage after : " << usage_after
          << " logpart " << full_logpart
          << " nodeid " << nodeid << endl;
    return NDBT_FAILED;
  }
  return NDBT_OK;
}

/**
 * Test to see if lcp is started after an SR and some space
 * from an almost-filled redo log part is released :
 * - Change the config to disable change-based LCP start
 *   by timeBetweenLocalCheckpoints = 31(max, default 20)
 * - Start a write and not commit in order to prevent corresponding
 *     redo logpart from being trimmed. Other log parts are free to be
 *     trimmed as normal.
 * - Fill until some of the redo logpart, most probably the one containing
 *     the redo log for the above open transaction, to become almost full.
 * - Perform 3 LCPs while loading further writes.
 * - Get the <nodeid, logpart> pair that has max usage.
 *     Assumption here is that there will be only one logpart
 *    (primary and backup) that will have the
 *     maximum and others are trimmed by LCPs. If more than one
 *     logparts that will have the same usage, the test will fail.
 * - Start an SR.
 * - After completing the SR, check the redo log usage of
 *   the logpart that went full.
 * - If the usage is not reduced within 2/3 of the LCP_INTERVAL seconds,
 *     (approximates to 20 seconds) the test will fail.
 */
static int
runCheckLCPStartsAfterSR(NDBT_Context* ctx, NDBT_Step* step)
{
  // Block redo logpart being trimmed by holding a transaction open
  HugoOperations *ops = NULL;
  start_open_transaction(ctx, step, &ops);

  g_info << "Starting normal load and fill some logpart" << endl;

  bool lcp_started = false;
  int upval = 0;
  while (ctx->getProperty("lcps_done", (Uint32)0) != 1 &&
         !ctx->isTestStopped())
  {
    NdbError err;
    run_write_ops(ctx, step, upval++, err, true);

    // When some logpart is getting full, continue with the load
    // in order to fill more of it (to its maximum)
    // while performing 3 LCPs
    if (err.code == 410 && !lcp_started)
    {
      lcp_started = true;
      ctx->setProperty("start_lcp", (Uint32)3);
      g_info << "Starting lcp" << endl;
    }
  }

  if (ctx->isTestStopped())
    return NDBT_FAILED;

  // Perform one more checkpoint
  ctx->setProperty("start_lcp", (Uint32)1);

  ctx->setProperty("stop_lcp", (Uint32)1); // stop runLCP()

  // Find the max redo log usage and the corresponding logpart and nodeid
  int usage_before_SR = -1;
  Uint32 full_logpart = UINT32_MAX;
  Uint32 nodeid = 0;

  usage_before_SR = get_redo_logpart_maxusage(ctx, nodeid, full_logpart);
  CHK3(full_logpart != UINT32_MAX, "No logpart became full");
  CHK3(nodeid != 0, "No nodeid found with almost full logpart");
  CHK3(usage_before_SR > 0, "Redo log usage <= 0");

  NdbRestarter restarter;
  // Perform a system restart
  CHK3(restarter.restartAll(false, true, true) == 0,
       "Starting all nodes failed");
  g_err << "Wait until all nodes are stopped" << endl;
  CHK3(restarter.waitClusterNoStart() == 0,
       "Nodes have not reached NoStart state");
  g_err << "Starting all nodes" << endl;
  CHK3(restarter.startAll() == 0,
       "Starting all nodes failed");
  CHK3(restarter.waitClusterStarted() == 0, "Cluster has not started");

  // Check whether the full redo log part has been trimmed
  CHK3((redologpart_is_trimmed(ctx, usage_before_SR, full_logpart, nodeid)) == NDBT_OK,
       "Check for redolog trimmed failed");
  return NDBT_OK;
}

/**
 * Test to see if lcp is started after an NR and some space
 * from an almost-filled redo log part is released :
 * - Change the config to disable change-based LCP start
 *   by timeBetweenLocalCheckpoints = 31(max, default 20).
 * - Start a write and not commit in order to prevent corresponding
 *     redo logpart from being trimmed. Other log parts are free to be
 *     trimmed as normal.
 * - Perform 1 LCP.
 * - Generate more writes and perform an LCP.
 * - Fill until some of the redo logparts, most probably the one containing
 *     the redo log for the above open transaction, to become almost full.
 *   Assumption here is that there will be only one logpart
 *    (primary and backup) that will have the
 *    maximum and others are trimmed by LCPs. If more than one
 *    logparts that will have the same usage, the test will fail.
 * - Find the logpart usage, node id and log part that went full.
 * - Restart the found node id. After completing the NR,
 *   check the usage of that nodeid/logpart.
 * - If the usage is not reduced within 2/3 of the LCP_INTERVAL seconds,
 *     (approximates to 20 seconds) the test will fail.
 */
static int
runCheckLCPStartsAfterNR(NDBT_Context* ctx, NDBT_Step* step)
{
  // Block redo logpart being trimmed by holding a transaction open
  HugoOperations *ops = NULL;
  start_open_transaction(ctx, step, &ops);

  // Perform 1 LCP
  ctx->setProperty("start_lcp", (Uint32)1);
  while (ctx->getProperty("lcps_done", (Uint32)0) != 1)
    NdbSleep_MilliSleep(1000);
  ctx->setProperty("lcps_done", (Uint32)0);

  // Perform some writes
  NdbError err;
  int upval = 0;
  run_write_ops(ctx, step, upval++, err, true);

  // Perform 1 LCP
  ctx->setProperty("start_lcp", (Uint32)1);
  while (ctx->getProperty("lcps_done", (Uint32)0) != 1)
    NdbSleep_MilliSleep(1000);

  // When redolog starts to get full (err code 410),
  // fill more (100 run_write_ops = 100k pkWrite ops) to force
  // the logpart to get filled to its max

  // Find the redo logpart usage and node id of the logpart that went full
  int retries = -1;

  while (!ctx->isTestStopped())
  {
    run_write_ops(ctx, step, upval++, err, true);

    if (err.code == 410 && retries == -1)
    {
      retries = 100;
    }
    if (retries > 0 && retries-- == 1)
      break;

    // Continue load until lcps are finished
  }

  if (ctx->isTestStopped())
    return NDBT_FAILED;

  // Find the redo logpart usage and node id of the logpart that went full
  int usage_before = -1;
  Uint32 nodeid = 0; // The node with full redo logpart
  Uint32 full_logpart = UINT32_MAX;
  usage_before = get_redo_logpart_maxusage(ctx, nodeid, full_logpart);
  CHK3(full_logpart != UINT32_MAX, "No logpart became full");
  CHK3(nodeid != 0, "No nodeid found with almost full logpart");
  CHK3(usage_before > 0, "Redo log usage <= 0");

   // The node with full redo logpart. Same as nodeid but of type 'int'.
  int victim = (int)nodeid;

  g_info << "Stopping node " << victim << endl;
  NdbRestarter restarter;
  CHK3(restarter.restartOneDbNode(victim,
                                  /** initial */ false,
                                  /** nostart */ true,
                                  /** abort   */ true) == 0,
       "Restart a node failed");
  CHK3(restarter.waitNodesNoStart(&victim, 1) == 0,
       "Started node has not reached NoStart state");

  // World is moving on with more load and lcps while the victim is away
  bool lcp_started = false;
  while (ctx->getProperty("lcps_done", (Uint32)0) != 1 &&
         !ctx->isTestStopped())
  {
    NdbError err;
    run_write_ops(ctx, step, upval++, err);

    if (!lcp_started)
    {
      lcp_started = true;
      ctx->setProperty("start_lcp", (Uint32)4);
      g_info << "Starting lcp" << endl;
    }
    // Continue load until lcps are finished
  }

  if (ctx->isTestStopped())
    return NDBT_FAILED;

  ctx->setProperty("stop_lcp", (Uint32)1); // stop runLCP()

  g_err << "Restarting the stopped node " << victim << endl;
  CHK3(restarter.startNodes(&victim, 1) == 0, "Start node failed");
  CHK3(restarter.waitNodesStarted(&victim, 1) == 0, "Node not started");

  // Check whether the full redo log part has been trimmed
  CHK3((redologpart_is_trimmed(ctx, usage_before, full_logpart, nodeid)) == NDBT_OK,
       "Check for redolog trimmed failed");
  return NDBT_OK;
}

/** Test if a delay in opening a redo file is handled gracefully.
 * Fill the redo log. Delay opening a redo log file in order to
 * simulate a tardy disk, wth of error insertion (5090).
 * Empty the redo log, execute some more transactions.
 *
 * The test fills some redo log part upto almost full (error 410).
 * The error insertion is on the first redo log with fileNo>3 and the default
 * NoOfFragmentLogFiles=16. So the test assumes with confidence that
 * the error insertion must have occurred and the victim redo log file
 * is opend after the delay, before the redo log part becomes full.
 *
 * The test will fail if transactions get errors other than the following:
 * 410 - REDO log files overloaded, 266 - Time-out in NDB,
 * 1220 - REDO log files overloaded (increase FragmentLogFileSize).
 * The latter will be the direct consequence of the error injection
 * when the test is run with DefaultOperationRedoProblemAction=abort.
 * However the test is run with default value "queue".
 * Test will pass after the redo log is trimmed and some more transactions pass.
 */
static int
runCheckOpenNextRedoLogFile(NDBT_Context* ctx, NDBT_Step* step)
{
  // Block redo logpart being trimmed by holding a transaction open
  HugoOperations *ops = NULL;
  start_open_transaction(ctx, step, &ops);

  NdbRestarter restarter;
  int node = restarter.getNode(NdbRestarter::NS_RANDOM);
  g_err << "Inserting error in node " << node << endl;
  CHK3(restarter.insertErrorInNode(node, 5090) == 0, "Error insertion failed");

  // Run transactions until some redo log part gets full.
  // Commit the open transaction to trim the redo log.
  int retries = -1;
  int success_after_err = 0;
  bool committed = false;
  NdbError err;
  int upval = 0;
  g_err << "Filling redo logs" << endl;

  while (!ctx->isTestStopped())
  {
    run_write_ops(ctx, step, upval++, err, true);

    if ( err.code == 410)
    {
      if (retries == -1)
        retries = 100;

      // Find the logpart that became almost full
      int usage_before = -1;
      Uint32 full_logpart = UINT32_MAX;
      Uint32 nodeid = 0;

      usage_before = get_redo_logpart_maxusage(ctx, nodeid, full_logpart);
      CHK3(usage_before> 0, "Redo log usage <= 0");
      CHK3(nodeid != 0, "No nodeid found with almost full logpart");
      CHK3(full_logpart != UINT32_MAX, "No logpart became full");

      if (!committed)
      {
        // Commit the open transaction to trim the redo log part.
        CHK3(ops->execute_Commit(GETNDB(step)) == 0,
             "Error: failed to commit the open transaction.");
      }
      committed = true;
      g_err << "Check whether the redo log is trimmed" << endl;
      CHK3(((redologpart_is_trimmed(ctx, usage_before, full_logpart,
                                    nodeid)) == NDBT_OK),
           "Check for redolog trimmed failed");

      // Start counting the succeeded transactions after the log part trim
      success_after_err = 0;

    }
    else if (err.code == 266 || err.code == 1220)
    {
      NdbSleep_MilliSleep(100);
      // Continue with new transactions
    }
    else if (err.code > 0)
    {
      g_err << "Transaction aborted with err " << err.code
            << " " << err.message << endl;
      break;
    }
    else
    {
      // err.code = 0 (no errors)
      if (retries > 0)
      {
        if (success_after_err++ > 50)
        {
          // Some more transactions are executed to confirm that
          // the inserted error scenario is alleviated.
          return NDBT_OK;
        }
        if (retries-- == 1)
        {
          g_err << "Transactions completed after redo log is trimmed are : "
                << success_after_err
                << ", Intended to complete > 50"
                << endl;
          break;
        }
      }
    }
  }
  return NDBT_FAILED;
}

NDBT_TESTSUITE(testRedo);
TESTCASE("WriteOK",
	 "Run only write to verify REDO size is adequate"){
  TC_PROPERTY("TABMASK", (Uint32)(2));
  INITIALIZER(runCreate);
  STEP(runWriteOK);
  FINALIZER(runDrop);
}
TESTCASE("Bug36500",
	 "Long trans and recovery from 410"){
  TC_PROPERTY("TABMASK", (Uint32)(1|2));
  INITIALIZER(runCreate);
  STEP(runLongtrans);
  STEP(runWrite410);
  FINALIZER(runDrop);
}
TESTCASE("Latency410",
	 "Transaction latency under 410"){
  TC_PROPERTY("TABMASK", (Uint32)(1|2|4));
  TC_PROPERTY("SLEEP410", (Uint32)60);
  INITIALIZER(runCreate);
  STEP(runLongtrans);
  STEP(runWrite410);
  STEP(runLatency);
  FINALIZER(runDrop);
}
TESTCASE("RestartOK",
	 "Node restart"){
  TC_PROPERTY("TABMASK", (Uint32)(2));
  INITIALIZER(runCreate);
  STEP(runWriteOK);
  STEP(runRestartOK);
  FINALIZER(runDrop);
}
TESTCASE("RestartFD",
	 "Long trans and node restart with few LQH FDs"){
  TC_PROPERTY("TABMASK", (Uint32)(1|2));
  STEP(runRestartFD);
  FINALIZER(runDrop);
  FINALIZER(runResetFD);
}
TESTCASE("RestartFDSR",
	 "RestartFD using system restart"){
  TC_PROPERTY("TABMASK", (Uint32)(1|2));
  TC_PROPERTY("SRFLAG", (Uint32)1);
  STEP(runRestartFD);
  FINALIZER(runDrop);
  FINALIZER(runResetFD);
}
TESTCASE("RedoFull",
         "Fill redo logs, apply load and check queuing aborted"){
  TC_PROPERTY("TABMASK", (Uint32)(3));
  TC_PROPERTY("REDOLOGCOUNT", (Uint32)(3));
  TC_PROPERTY("REDOLOGSIZE", (Uint32)(4*1024*1024));
  INITIALIZER(resizeRedoLog);
  INITIALIZER(runCreate);
  STEP(runWriteWithRedoFull);
  FINALIZER(runDrop);
  FINALIZER(resizeRedoLog);
}
TESTCASE("CheckLCPStartsAfterSR",
         "Fill redo logs to full, SR, and see if LCP starts"){
  TC_PROPERTY("TABMASK", (Uint32)(3));
  TC_PROPERTY("LCPINTERVAL", (Uint32)(31));
  INITIALIZER(resizeRedoLog);
  INITIALIZER(runCreate);
  STEP(runCheckLCPStartsAfterSR);
  STEP(runLCP);
  FINALIZER(runDrop);
  FINALIZER(resizeRedoLog);
}
TESTCASE("CheckLCPStartsAfterNR",
         "Fill redo logs to full, restart the node having full redo,"
         "and see if LCP starts"){
  TC_PROPERTY("TABMASK", (Uint32)(3));
  TC_PROPERTY("LCPINTERVAL", (Uint32)(31));
  TC_PROPERTY("NR", (Uint32)(1));
  INITIALIZER(resizeRedoLog);
  INITIALIZER(runCreate);
  STEP(runCheckLCPStartsAfterNR);
  STEP(runLCP);
  FINALIZER(runDrop);
  FINALIZER(resizeRedoLog);
}
TESTCASE("CheckNextRedoFileOpened",
         "Fill redo logs to full, check if next file is open"
         "in a stressed disk situation"){
  TC_PROPERTY("TABMASK", (Uint32)(3));
  INITIALIZER(resizeRedoLog);
  INITIALIZER(runCreate);
  STEP(runCheckOpenNextRedoLogFile);
  FINALIZER(runDrop);
  FINALIZER(resizeRedoLog);
}
NDBT_TESTSUITE_END(testRedo)

int
main(int argc, const char** argv)
{
  ndb_init();
  NDBT_TESTSUITE_INSTANCE(testRedo);
  testRedo.setCreateTable(false);
  myRandom48Init((long)NdbTick_CurrentMillisecond());
  g_msgmutex = NdbMutex_Create();
  require(g_msgmutex != 0);
  int ret = testRedo.execute(argc, argv);
  NdbMutex_Destroy(g_msgmutex);
  return ret;
}