23 #include <signaldata/EventReport.hpp>
24 #include <signaldata/StartOrd.hpp>
25 #include <signaldata/CloseComReqConf.hpp>
26 #include <signaldata/PrepFailReqRef.hpp>
27 #include <signaldata/NodeFailRep.hpp>
28 #include <signaldata/ReadNodesConf.hpp>
29 #include <signaldata/NFCompleteRep.hpp>
30 #include <signaldata/CheckNodeGroups.hpp>
31 #include <signaldata/ArbitSignalData.hpp>
32 #include <signaldata/ApiRegSignalData.hpp>
33 #include <signaldata/ApiVersion.hpp>
34 #include <signaldata/BlockCommitOrd.hpp>
35 #include <signaldata/FailRep.hpp>
36 #include <signaldata/DisconnectRep.hpp>
37 #include <signaldata/ApiBroadcast.hpp>
38 #include <signaldata/Upgrade.hpp>
39 #include <signaldata/EnableCom.hpp>
40 #include <signaldata/RouteOrd.hpp>
41 #include <signaldata/NodePing.hpp>
42 #include <signaldata/DihRestart.hpp>
43 #include <ndb_version.h>
45 #include <EventLogger.hpp>
49 #ifdef DEBUG_QMGR_START
50 #include <DebuggerNames.hpp>
51 #define DEBUG(x) ndbout << "QMGR " << __LINE__ << ": " << x << endl
52 #define DEBUG_START(gsn, node, msg) DEBUG(getSignalName(gsn) << " to: " << node << " - " << msg)
53 #define DEBUG_START2(gsn, rg, msg) { char nodes[255]; DEBUG(getSignalName(gsn) << " to: " << rg.m_nodes.getText(nodes) << " - " << msg); }
54 #define DEBUG_START3(signal, msg) DEBUG(getSignalName(signal->header.theVerId_signalNumber) << " from " << refToNode(signal->getSendersBlockRef()) << " - " << msg);
57 #define DEBUG_START(gsn, node, msg)
58 #define DEBUG_START2(gsn, rg, msg)
59 #define DEBUG_START3(signal, msg)
94 void Qmgr::execCM_HEARTBEAT(
Signal* signal)
98 hbNodePtr.i = signal->theData[0];
99 ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
107 void Qmgr::execCM_NODEINFOREF(
Signal* signal)
110 systemErrorLab(signal, __LINE__);
117 void Qmgr::execCONTINUEB(
Signal* signal)
120 const Uint32 tcontinuebType = signal->theData[0];
121 const Uint32 tdata0 = signal->theData[1];
122 const Uint32 tdata1 = signal->theData[2];
123 switch (tcontinuebType) {
124 case ZREGREQ_TIMELIMIT:
126 if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
130 regreqTimeLimitLab(signal);
132 case ZREGREQ_MASTER_TIMELIMIT:
134 if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
139 failReportLab(signal, c_start.m_startNode, FailRep::ZSTART_IN_REGREQ, getOwnNodeId());
142 case ZTIMER_HANDLING:
144 timerHandlingLab(signal);
147 case ZARBIT_HANDLING:
149 runArbitThread(signal);
152 case ZSTART_FAILURE_LIMIT:{
153 if (cpresident != ZNIL)
158 Uint64 now = NdbTick_CurrentMillisecond();
160 if (now > (c_start_election_time + c_restartFailureTimeout))
164 tmp.
append(
"Shutting down node as total restart time exceeds "
165 " StartFailureTimeout as set in config file ");
166 if(c_restartFailureTimeout == (Uint32) ~0)
167 tmp.
append(
" 0 (inifinite)");
169 tmp.
appfmt(
" %d", c_restartFailureTimeout);
173 signal->theData[0] = ZSTART_FAILURE_LIMIT;
174 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
180 systemErrorLab(signal, __LINE__);
188 void Qmgr::execDEBUG_SIG(
Signal* signal)
190 NodeRecPtr debugNodePtr;
192 debugNodePtr.i = signal->theData[0];
193 ptrCheckGuard(debugNodePtr, MAX_NODES, nodeRec);
200 void Qmgr::execFAIL_REP(
Signal* signal)
203 const NodeId failNodeId = failRep->failNodeId;
204 const FailRep::FailCause failCause = (FailRep::FailCause)failRep->failCause;
205 Uint32 failSource = failRep->getFailSourceNodeId(signal->
length());
209 failSource = refToNode(signal->getSendersBlockRef());
213 failReportLab(signal, failNodeId, failCause, failSource);
220 void Qmgr::execPRES_TOREQ(
Signal* signal)
223 BlockReference Tblockref = signal->theData[0];
224 signal->theData[0] = getOwnNodeId();
225 signal->theData[1] = ccommitFailureNr;
226 sendSignal(Tblockref, GSN_PRES_TOCONF, signal, 2, JBA);
231 Qmgr::execREAD_CONFIG_REQ(
Signal* signal)
237 Uint32 ref = req->senderRef;
238 Uint32 senderData = req->senderData;
241 m_ctx.m_config.getOwnConfigIterator();
245 conf->senderRef = reference();
246 conf->senderData = senderData;
247 sendSignal(ref, GSN_READ_CONFIG_CONF, signal,
248 ReadConfigConf::SignalLength, JBB);
252 Qmgr::execSTART_ORD(
Signal* signal)
257 Uint64 now = NdbTick_CurrentMillisecond();
258 signal->theData[0] = ZTIMER_HANDLING;
259 signal->theData[1] = Uint32(now >> 32);
260 signal->theData[2] = Uint32(now);
261 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
264 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
266 ptrAss(nodePtr, nodeRec);
267 nodePtr.p->ndynamicId = 0;
268 nodePtr.p->hbOrder = 0;
274 nodePtr.p->phase = ZINIT;
275 c_definedNodes.
set(nodePtr.i);
279 nodePtr.p->phase = ZAPI_INACTIVE;
286 nodePtr.p->phase = ZAPI_INACTIVE;
290 nodePtr.p->phase = ZAPI_INACTIVE;
294 nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
295 nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
296 nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
297 nodePtr.p->failState = NORMAL;
314 void Qmgr::execSTTOR(
Signal* signal)
318 switch(signal->theData[1]){
327 cactivateApiCheck = 1;
328 if (cpresident == getOwnNodeId())
330 switch(arbitRec.method){
331 case ArbitRec::DISABLED:
334 case ArbitRec::METHOD_EXTERNAL:
335 case ArbitRec::METHOD_DEFAULT:
341 handleArbitStart(signal);
351 c_allow_api_connect = 1;
353 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
360 ptrAss(nodePtr, nodeRec);
361 if (nodePtr.p->phase == ZAPI_INACTIVE)
365 nodePtr.p->phase = ZFAIL_CLOSING;
366 nodePtr.p->failState = NORMAL;
372 sendSttorryLab(signal);
376 void Qmgr::sendSttorryLab(
Signal* signal)
381 signal->theData[3] = 7;
382 signal->theData[4] = 8;
383 signal->theData[5] = 255;
384 sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 6, JBB);
388 void Qmgr::startphase1(
Signal* signal)
393 nodePtr.i = getOwnNodeId();
394 ptrAss(nodePtr, nodeRec);
395 nodePtr.p->phase = ZSTARTING;
398 req->senderRef = reference();
399 sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
400 DihRestartReq::SignalLength, JBB);
405 Qmgr::execDIH_RESTARTREF(
Signal*signal)
410 signal->getDataPtr());
411 c_start.m_latest_gci = 0;
412 c_start.m_no_nodegroup_nodes.
assign(NdbNodeBitmask::Size,
413 ref->no_nodegroup_mask);
414 execCM_INFOCONF(signal);
418 Qmgr::execDIH_RESTARTCONF(
Signal*signal)
423 signal->getDataPtr());
425 c_start.m_latest_gci = conf->latest_gci;
426 c_start.m_no_nodegroup_nodes.
assign(NdbNodeBitmask::Size,
427 conf->no_nodegroup_mask);
428 execCM_INFOCONF(signal);
431 void Qmgr::setHbDelay(UintR aHbDelay)
433 NDB_TICKS now = NdbTick_CurrentMillisecond();
434 hb_send_timer.
setDelay(aHbDelay < 10 ? 10 : aHbDelay);
435 hb_send_timer.
reset(now);
436 hb_check_timer.
setDelay(aHbDelay < 10 ? 10 : aHbDelay);
437 hb_check_timer.
reset(now);
440 void Qmgr::setHbApiDelay(UintR aHbApiDelay)
442 NDB_TICKS now = NdbTick_CurrentMillisecond();
443 chbApiDelay = (aHbApiDelay < 100 ? 100 : aHbApiDelay);
445 hb_api_timer.
reset(now);
448 void Qmgr::setArbitTimeout(UintR aArbitTimeout)
450 arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
453 void Qmgr::setCCDelay(UintR aCCDelay)
455 NDB_TICKS now = NdbTick_CurrentMillisecond();
459 m_connectivity_check.m_enabled =
false;
460 m_connectivity_check.m_timer.
setDelay(0);
464 m_connectivity_check.m_enabled =
true;
465 m_connectivity_check.m_timer.
setDelay(aCCDelay < 10 ? 10 : aCCDelay);
466 m_connectivity_check.m_timer.
reset(now);
470 void Qmgr::execCONNECT_REP(
Signal* signal)
473 const Uint32 nodeId = signal->theData[0];
475 if (ERROR_INSERTED(931))
478 ndbout_c(
"Discarding CONNECT_REP(%d)", nodeId);
479 infoEvent(
"Discarding CONNECT_REP(%d)", nodeId);
483 c_connectedNodes.
set(nodeId);
487 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
488 nodePtr.p->m_secret = 0;
490 nodePtr.i = getOwnNodeId();
491 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
493 switch(nodePtr.p->phase){
497 ndbrequire(!c_clusterNodes.
get(nodeId));
522 switch(c_start.m_gsn){
525 sendCmRegReq(signal, nodeId);
531 ndbrequire(nodePtr.p->phase == ZSTARTING);
532 ndbrequire(c_start.m_nodes.isWaitingFor(nodeId));
534 case GSN_CM_NODEINFOREQ:
537 if (c_start.m_nodes.isWaitingFor(nodeId))
540 ndbrequire(getOwnNodeId() != cpresident);
541 ndbrequire(nodePtr.p->phase == ZSTARTING);
542 sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
546 case GSN_CM_NODEINFOCONF:{
549 ndbrequire(getOwnNodeId() != cpresident);
550 ndbrequire(nodePtr.p->phase == ZRUNNING);
551 if (c_start.m_nodes.isWaitingFor(nodeId))
554 c_start.m_nodes.clearWaitingFor(nodeId);
555 c_start.m_gsn = RNIL;
557 NodeRecPtr addNodePtr;
558 addNodePtr.i = nodeId;
559 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
560 cmAddPrepare(signal, addNodePtr, nodePtr.p);
568 ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId));
571 signal->theData[0] = reference();
572 sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
577 Qmgr::execREAD_NODESCONF(
Signal* signal)
580 check_readnodes_reply(signal,
581 refToNode(signal->getSendersBlockRef()),
586 Qmgr::execREAD_NODESREF(
Signal* signal)
589 check_readnodes_reply(signal,
590 refToNode(signal->getSendersBlockRef()),
597 void Qmgr::execCM_INFOCONF(
Signal* signal)
602 signal->theData[0] = 0;
603 signal->theData[1] = 0;
605 sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB);
608 cpresidentAlive = ZFALSE;
609 c_start_election_time = NdbTick_CurrentMillisecond();
611 signal->theData[0] = ZSTART_FAILURE_LIMIT;
612 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
614 cmInfoconf010Lab(signal);
619 Uint32 g_start_type = 0;
622 void Qmgr::cmInfoconf010Lab(
Signal* signal)
624 c_start.m_startKey = 0;
625 c_start.m_startNode = getOwnNodeId();
626 c_start.m_nodes.clearWaitingFor();
627 c_start.m_gsn = GSN_CM_REGREQ;
628 c_start.m_starting_nodes.
clear();
629 c_start.m_starting_nodes_w_log.
clear();
630 c_start.m_regReqReqSent = 0;
631 c_start.m_regReqReqRecv = 0;
632 c_start.m_skip_nodes = g_nowait_nodes;
633 c_start.m_skip_nodes.
bitAND(c_definedNodes);
634 c_start.m_start_type = g_start_type;
638 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
640 ptrAss(nodePtr, nodeRec);
648 if(!c_connectedNodes.
get(nodePtr.i))
651 sendCmRegReq(signal, nodePtr.i);
660 signal->theData[0] = ZREGREQ_TIMELIMIT;
661 signal->theData[1] = c_start.m_startKey;
662 signal->theData[2] = c_start.m_startNode;
663 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 3000, 3);
665 creadyDistCom = ZTRUE;
670 Qmgr::sendCmRegReq(
Signal * signal, Uint32 nodeId){
672 req->blockRef = reference();
673 req->nodeId = getOwnNodeId();
674 req->version = NDB_VERSION;
675 req->mysql_version = NDB_MYSQL_VERSION_D;
676 req->latest_gci = c_start.m_latest_gci;
677 req->start_type = c_start.m_start_type;
678 c_start.m_skip_nodes.
copyto(NdbNodeBitmask::Size, req->skip_nodes);
679 const Uint32 ref = calcQmgrBlockRef(nodeId);
680 sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB);
681 DEBUG_START(GSN_CM_REGREQ, nodeId,
"");
683 c_start.m_regReqReqSent++;
725 check_start_type(Uint32 starting, Uint32 own)
727 if (starting == (1 << NodeState::ST_INITIAL_START) &&
728 ((own & (1 << NodeState::ST_INITIAL_START)) == 0))
735 void Qmgr::execCM_REGREQ(
Signal* signal)
737 DEBUG_START3(signal,
"");
739 NodeRecPtr addNodePtr;
743 const BlockReference Tblockref = cmRegReq->blockRef;
744 const Uint32 startingVersion = cmRegReq->version;
745 Uint32 startingMysqlVersion = cmRegReq->mysql_version;
746 addNodePtr.i = cmRegReq->nodeId;
748 Uint32 start_type = ~0;
751 if (!c_connectedNodes.
get(cmRegReq->nodeId))
763 g_eventLogger->
info(
"discarding CM_REGREQ from %u "
764 "as we're not yet connected (isNdbMt: %u)",
766 (
unsigned)isNdbMt());
768 ndbrequire(isNdbMt());
772 if (signal->getLength() == CmRegReq::SignalLength)
775 gci = cmRegReq->latest_gci;
776 start_type = cmRegReq->start_type;
777 skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes);
780 if (startingVersion < NDBD_SPLIT_VERSION)
782 startingMysqlVersion = 0;
785 if (creadyDistCom == ZFALSE) {
791 if (!ndbCompatible_ndb_ndb(NDB_VERSION, startingVersion)) {
793 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
797 if (!ndb_check_micro_gcp(startingVersion))
800 infoEvent(
"Connection from node %u refused as it's not micro GCP enabled",
802 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
806 if (!ndb_pnr(startingVersion))
809 infoEvent(
"Connection from node %u refused as it's not does not support "
810 "parallel node recovery",
812 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
816 if (!ndb_check_hb_order_version(startingVersion) &&
817 m_hb_order_config_used)
820 infoEvent(
"Connection from node %u refused as it does not support "
821 "user-defined HeartbeatOrder",
823 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
827 if (m_connectivity_check.m_enabled &&
828 !ndbd_connectivity_check(startingVersion))
831 infoEvent(
"Connection from node %u refused as it does not support "
832 "ConnectCheckIntervalDelay",
834 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
838 if (check_start_type(start_type, c_start.m_start_type))
841 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE);
845 if (cpresident != getOwnNodeId())
849 if (cpresident == ZNIL)
858 if (gci > c_start.m_president_candidate_gci ||
859 (gci == c_start.m_president_candidate_gci &&
860 addNodePtr.i < c_start.m_president_candidate))
863 c_start.m_president_candidate = addNodePtr.i;
864 c_start.m_president_candidate_gci = gci;
866 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION);
875 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_PRESIDENT);
879 if (c_start.m_startNode != 0)
885 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_PRESIDENT);
889 if (ctoStatus == Q_ACTIVE)
895 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_TO_PRES);
905 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_IN_CFG);
918 unsigned int get_major = getMajor(startingVersion);
919 unsigned int get_minor = getMinor(startingVersion);
920 unsigned int get_build = getBuild(startingVersion);
922 if (startingVersion < NDBD_QMGR_SINGLEUSER_VERSION_5) {
925 infoEvent(
"QMGR: detect upgrade: new node %u old version %u.%u.%u",
926 (
unsigned int)addNodePtr.i, get_major, get_minor, get_build);
931 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
935 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZSINGLE_USER_MODE);
942 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
943 Phase phase = addNodePtr.p->phase;
947 DEBUG(
"phase = " << phase);
948 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD);
966 c_start.m_startKey++;
967 c_start.m_startNode = addNodePtr.i;
972 UintR TdynId = (++c_maxDynamicId) & 0xFFFF;
973 TdynId |= (addNodePtr.p->hbOrder << 16);
974 setNodeInfo(addNodePtr.i).
m_version = startingVersion;
977 addNodePtr.p->ndynamicId = TdynId;
983 cmRegConf->presidentBlockRef = reference();
984 cmRegConf->presidentNodeId = getOwnNodeId();
987 cmRegConf->dynamicId = TdynId;
988 c_clusterNodes.
copyto(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
989 sendSignal(Tblockref, GSN_CM_REGCONF, signal,
990 CmRegConf::SignalLength, JBA);
991 DEBUG_START(GSN_CM_REGCONF, refToNode(Tblockref),
"");
996 c_start.m_nodes = c_clusterNodes;
998 c_start.m_gsn = GSN_CM_ADD;
1001 CmAdd *
const cmAdd = (
CmAdd*)signal->getDataPtrSend();
1002 cmAdd->requestType = CmAdd::Prepare;
1003 cmAdd->startingNodeId = addNodePtr.i;
1004 cmAdd->startingVersion = startingVersion;
1005 cmAdd->startingMysqlVersion = startingMysqlVersion;
1006 sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
1007 DEBUG_START2(GSN_CM_ADD, rg,
"Prepare");
1013 signal->theData[0] = ZREGREQ_MASTER_TIMELIMIT;
1014 signal->theData[1] = c_start.m_startKey;
1015 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 30000, 2);
1020 void Qmgr::sendCmRegrefLab(
Signal* signal, BlockReference TBRef,
1021 CmRegRef::ErrorCode Terror)
1024 ref->blockRef = reference();
1025 ref->nodeId = getOwnNodeId();
1026 ref->errorCode = Terror;
1027 ref->presidentCandidate =
1028 (cpresident == ZNIL ? c_start.m_president_candidate : cpresident);
1029 ref->candidate_latest_gci = c_start.m_president_candidate_gci;
1030 ref->latest_gci = c_start.m_latest_gci;
1031 ref->start_type = c_start.m_start_type;
1032 c_start.m_skip_nodes.
copyto(NdbNodeBitmask::Size, ref->skip_nodes);
1033 sendSignal(TBRef, GSN_CM_REGREF, signal,
1034 CmRegRef::SignalLength, JBB);
1035 DEBUG_START(GSN_CM_REGREF, refToNode(TBRef),
"");
1051 void Qmgr::execCM_REGCONF(
Signal* signal)
1053 DEBUG_START3(signal,
"");
1055 NodeRecPtr myNodePtr;
1061 if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
1065 "incompatible version own=0x%x other=0x%x, "
1067 NDB_VERSION, cmRegConf->presidentVersion);
1068 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1072 if (!ndb_check_hb_order_version(cmRegConf->presidentVersion) &&
1073 m_hb_order_config_used) {
1077 "incompatible version own=0x%x other=0x%x, "
1078 "due to user-defined HeartbeatOrder, shutting down",
1079 NDB_VERSION, cmRegConf->presidentVersion);
1080 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1084 if (m_connectivity_check.m_enabled &&
1085 !ndbd_connectivity_check(cmRegConf->presidentVersion))
1088 m_connectivity_check.m_enabled =
false;
1089 ndbout_c(
"Disabling ConnectCheckIntervalDelay as president "
1090 " does not support it");
1091 infoEvent(
"Disabling ConnectCheckIntervalDelay as president "
1092 " does not support it");
1095 myNodePtr.i = getOwnNodeId();
1096 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1098 ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
1099 ndbrequire(myNodePtr.p->phase == ZSTARTING);
1101 cpdistref = cmRegConf->presidentBlockRef;
1102 cpresident = cmRegConf->presidentNodeId;
1103 UintR TdynamicId = cmRegConf->dynamicId;
1104 c_maxDynamicId = TdynamicId & 0xFFFF;
1105 c_clusterNodes.
assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
1107 myNodePtr.p->ndynamicId = TdynamicId;
1110 setNodeInfo(getOwnNodeId()).
m_lqh_workers = globalData.ndbMtLqhWorkers;
1117 signal->theData[1] = getOwnNodeId();
1118 signal->theData[2] = cpresident;
1119 signal->theData[3] = TdynamicId;
1120 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1122 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1124 if (c_clusterNodes.
get(nodePtr.i)){
1126 ptrAss(nodePtr, nodeRec);
1128 ndbrequire(nodePtr.p->phase == ZINIT);
1129 nodePtr.p->phase = ZRUNNING;
1131 if(c_connectedNodes.
get(nodePtr.i)){
1133 sendCmNodeInfoReq(signal, nodePtr.i, myNodePtr.p);
1138 c_start.m_gsn = GSN_CM_NODEINFOREQ;
1139 c_start.m_nodes = c_clusterNodes;
1141 if (ERROR_INSERTED(937))
1143 CLEAR_ERROR_INSERT_VALUE;
1144 signal->theData[0] = 9999;
1145 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
1152 Qmgr::check_readnodes_reply(
Signal* signal, Uint32 nodeId, Uint32 gsn)
1154 NodeRecPtr myNodePtr;
1155 myNodePtr.i = getOwnNodeId();
1156 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1160 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
1164 if (gsn == GSN_READ_NODESREF)
1168 signal->theData[0] = reference();
1169 sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
1173 if (conf->masterNodeId == ZNIL)
1179 Uint32 president = conf->masterNodeId;
1180 if (president == cpresident)
1189 "check StartPartialTimeout, "
1190 "node %d thinks %d is president, "
1191 "I think president is: %d",
1192 nodeId, president, cpresident);
1194 ndbout_c(
"%s", buf);
1195 CRASH_INSERTION(933);
1201 part.
assign(NdbNodeBitmask::Size, conf->clusterNodes);
1203 rep->failCause = FailRep::ZPARTITIONED_CLUSTER;
1204 rep->partitioned.president = cpresident;
1205 c_clusterNodes.
copyto(NdbNodeBitmask::Size, rep->partitioned.partition);
1206 rep->partitioned.partitionFailSourceNodeId = getOwnNodeId();
1207 Uint32 ref = calcQmgrBlockRef(nodeId);
1210 Uint32 length = FailRep::OrigSignalLength + FailRep::PartitionedExtraLength;
1211 while((i = part.
find(i + 1)) != NdbNodeBitmask::NotFound)
1215 rep->failNodeId =
i;
1216 bool sendSourceId = ndbd_fail_rep_source_node((
getNodeInfo(i)).m_version);
1217 sendSignal(ref, GSN_FAIL_REP, signal,
1218 length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1221 rep->failNodeId = nodeId;
1222 bool sendSourceId = ndbd_fail_rep_source_node((
getNodeInfo(nodeId)).m_version);
1224 sendSignal(ref, GSN_FAIL_REP, signal,
1225 length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1230 CRASH_INSERTION(932);
1231 CRASH_INSERTION(938);
1234 NDBD_EXIT_PARTITIONED_SHUTDOWN,
1241 Qmgr::sendCmNodeInfoReq(
Signal* signal, Uint32 nodeId,
const NodeRec *
self){
1243 req->nodeId = getOwnNodeId();
1244 req->dynamicId =
self->ndynamicId;
1248 const Uint32 ref = calcQmgrBlockRef(nodeId);
1249 sendSignal(ref,GSN_CM_NODEINFOREQ, signal, CmNodeInfoReq::SignalLength, JBB);
1250 DEBUG_START(GSN_CM_NODEINFOREQ, nodeId,
"");
1273 get_start_type_string(Uint32 st)
1275 static char buf[256];
1284 for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++)
1291 case NodeState::ST_INITIAL_START:
1292 strcat(buf,
"inital start");
1294 case NodeState::ST_SYSTEM_RESTART:
1295 strcat(buf,
"system restart");
1297 case NodeState::ST_NODE_RESTART:
1298 strcat(buf,
"node restart");
1300 case NodeState::ST_INITIAL_NODE_RESTART:
1301 strcat(buf,
"initial node restart");
1310 void Qmgr::execCM_REGREF(
Signal* signal)
1315 UintR TaddNodeno = ref->nodeId;
1316 UintR TrefuseReason = ref->errorCode;
1317 Uint32 candidate = ref->presidentCandidate;
1318 Uint32 node_gci = 1;
1319 Uint32 candidate_gci = 1;
1320 Uint32 start_type = ~0;
1322 DEBUG_START3(signal, TrefuseReason);
1324 if (signal->getLength() == CmRegRef::SignalLength)
1327 node_gci = ref->latest_gci;
1328 candidate_gci = ref->candidate_latest_gci;
1329 start_type = ref->start_type;
1330 skip_nodes.
assign(NdbNodeBitmask::Size, ref->skip_nodes);
1333 c_start.m_regReqReqRecv++;
1337 if(candidate != c_start.m_president_candidate)
1340 c_start.m_regReqReqRecv = ~0;
1343 c_start.m_starting_nodes.
set(TaddNodeno);
1347 c_start.m_starting_nodes_w_log.
set(TaddNodeno);
1349 c_start.m_node_gci[TaddNodeno] = node_gci;
1351 skip_nodes.
bitAND(c_definedNodes);
1352 c_start.m_skip_nodes.
bitOR(skip_nodes);
1355 setNodeInfo(getOwnNodeId()).
m_lqh_workers = globalData.ndbMtLqhWorkers;
1358 switch (TrefuseReason) {
1359 case CmRegRef::ZINCOMPATIBLE_VERSION:
1361 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION,
1362 "incompatible version, "
1363 "connection refused by running ndb node");
1364 case CmRegRef::ZINCOMPATIBLE_START_TYPE:
1367 "incompatible start type detected: node %d"
1368 " reports %s(%d) my start type: %s(%d)",
1370 get_start_type_string(start_type), start_type,
1371 get_start_type_string(c_start.m_start_type),
1372 c_start.m_start_type);
1373 progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
1375 case CmRegRef::ZBUSY:
1376 case CmRegRef::ZBUSY_TO_PRES:
1377 case CmRegRef::ZBUSY_PRESIDENT:
1379 cpresidentAlive = ZTRUE;
1380 signal->theData[3] = 0;
1382 case CmRegRef::ZNOT_IN_CFG:
1384 progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG);
1386 case CmRegRef::ZNOT_DEAD:
1388 progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD);
1390 case CmRegRef::ZSINGLE_USER_MODE:
1392 progError(__LINE__, NDBD_EXIT_SINGLE_USER_MODE);
1399 case CmRegRef::ZGENERIC:
1403 case CmRegRef::ZELECTION:
1405 if (candidate_gci > c_start.m_president_candidate_gci ||
1406 (candidate_gci == c_start.m_president_candidate_gci &&
1407 candidate < c_start.m_president_candidate))
1414 signal->theData[3] = 2;
1415 c_start.m_president_candidate = candidate;
1416 c_start.m_president_candidate_gci = candidate_gci;
1418 signal->theData[3] = 4;
1421 case CmRegRef::ZNOT_PRESIDENT:
1423 cpresidentAlive = ZTRUE;
1424 signal->theData[3] = 3;
1428 signal->theData[3] = 5;
1437 signal->theData[1] = getOwnNodeId();
1438 signal->theData[2] = TaddNodeno;
1442 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1444 if(cpresidentAlive == ZTRUE)
1447 DEBUG(
"cpresidentAlive");
1451 if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv)
1454 DEBUG(c_start.m_regReqReqSent <<
" != " << c_start.m_regReqReqRecv);
1458 if(c_start.m_president_candidate != getOwnNodeId())
1461 DEBUG(
"i'm not the candidate");
1468 if(check_startup(signal))
1471 electionWon(signal);
1478 Qmgr::check_startup(
Signal* signal)
1480 Uint64 now = NdbTick_CurrentMillisecond();
1481 Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout;
1482 Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout;
1483 Uint64 no_nodegroup_timeout = c_start_election_time +
1484 c_restartNoNodegroupTimeout;
1486 const bool no_nodegroup_active =
1487 (c_restartNoNodegroupTimeout != ~Uint32(0)) &&
1488 (! c_start.m_no_nodegroup_nodes.
isclear());
1494 tmp.
bitOR(c_start.m_skip_nodes);
1495 tmp.
bitOR(c_start.m_starting_nodes);
1498 wait.
assign(c_definedNodes);
1502 Uint32 incompleteng = MAX_NDB_NODES;
1505 if ((c_start.m_latest_gci == 0) ||
1506 (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
1508 if (tmp.
equal(c_definedNodes))
1511 signal->theData[1] = 0x8000;
1512 report_mask.
assign(c_definedNodes);
1513 report_mask.
bitANDC(c_start.m_starting_nodes);
1517 else if (no_nodegroup_active)
1519 if (now < no_nodegroup_timeout)
1521 signal->theData[1] = 6;
1522 signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
1523 report_mask.
assign(wait);
1527 tmp.
bitOR(c_start.m_no_nodegroup_nodes);
1528 if (tmp.
equal(c_definedNodes))
1530 signal->theData[1] = 0x8000;
1531 report_mask.
assign(c_definedNodes);
1532 report_mask.
bitANDC(c_start.m_starting_nodes);
1539 signal->theData[1] = 1;
1540 signal->theData[2] = ~0;
1541 report_mask.
assign(wait);
1549 signal->theData[1] = 1;
1550 signal->theData[2] = ~0;
1551 report_mask.
assign(wait);
1557 if (now >= no_nodegroup_timeout)
1559 tmp.
bitOR(c_start.m_no_nodegroup_nodes);
1563 const bool all = c_start.m_starting_nodes.
equal(c_definedNodes);
1571 check.assign(c_definedNodes);
1572 check.bitANDC(c_start.m_starting_nodes);
1573 check.bitOR(c_start.m_starting_nodes_w_log);
1575 sd->blockRef = reference();
1576 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1579 CheckNodeGroups::SignalLength);
1581 if (sd->output == CheckNodeGroups::Lose)
1584 goto missing_nodegroup;
1588 sd->blockRef = reference();
1589 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1590 sd->mask = c_start.m_starting_nodes;
1592 CheckNodeGroups::SignalLength);
1594 const Uint32 result = sd->output;
1596 sd->blockRef = reference();
1597 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1598 sd->mask = c_start.m_starting_nodes_w_log;
1600 CheckNodeGroups::SignalLength);
1602 const Uint32 result_w_log = sd->output;
1604 if (tmp.
equal(c_definedNodes))
1611 switch(result_w_log){
1612 case CheckNodeGroups::Lose:
1615 goto missing_nodegroup;
1617 case CheckNodeGroups::Win:
1618 signal->theData[1] = all ? 0x8001 : 0x8002;
1619 report_mask.
assign(c_definedNodes);
1620 report_mask.
bitANDC(c_start.m_starting_nodes);
1623 case CheckNodeGroups::Partitioning:
1624 ndbrequire(result != CheckNodeGroups::Lose);
1625 signal->theData[1] =
1626 all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1627 report_mask.
assign(c_definedNodes);
1628 report_mask.
bitANDC(c_start.m_starting_nodes);
1634 if (now < partial_timeout)
1638 signal->theData[1] = c_restartPartialTimeout == (Uint32) ~0 ? 2 : 3;
1639 signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
1640 report_mask.
assign(wait);
1643 if (no_nodegroup_active && now < no_nodegroup_timeout)
1645 signal->theData[1] = 7;
1646 signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
1648 else if (no_nodegroup_active && now >= no_nodegroup_timeout)
1650 report_mask.
bitANDC(c_start.m_no_nodegroup_nodes);
1659 switch(result_w_log){
1660 case CheckNodeGroups::Lose:
1662 goto missing_nodegroup;
1663 case CheckNodeGroups::Partitioning:
1664 if (now < partitioned_timeout && result != CheckNodeGroups::Win)
1669 case CheckNodeGroups::Win:
1670 signal->theData[1] =
1671 all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1672 report_mask.
assign(c_definedNodes);
1673 report_mask.
bitANDC(c_start.m_starting_nodes);
1683 Uint32 save[4+4*NdbNodeBitmask::Size];
1684 memcpy(save, signal->theData,
sizeof(save));
1688 c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, req->nodemask);
1689 memcpy(req->node_gcis, c_start.m_node_gci, 4*MAX_NDB_NODES);
1690 EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
1691 DihRestartReq::CheckLength);
1693 incompleteng = signal->theData[0];
1694 memcpy(signal->theData, save,
sizeof(save));
1696 if (incompleteng != MAX_NDB_NODES)
1702 goto incomplete_log;
1704 else if (retVal == 2)
1706 if (now <= partitioned_timeout)
1713 goto incomplete_log;
1722 signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5;
1723 signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
1724 report_mask.
assign(c_definedNodes);
1725 report_mask.
bitANDC(c_start.m_starting_nodes);
1732 Uint32 sz = NdbNodeBitmask::Size;
1734 signal->theData[3] = sz;
1735 Uint32* ptr = signal->theData+4;
1736 c_definedNodes.copyto(sz, ptr); ptr += sz;
1737 c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
1738 c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
1739 report_mask.
copyto(sz, ptr); ptr+= sz;
1740 c_start.m_no_nodegroup_nodes.copyto(sz, ptr); ptr += sz;
1741 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
1742 4+5*NdbNodeBitmask::Size, JBB);
1749 char buf[100], mask1[100], mask2[100];
1750 c_start.m_starting_nodes.getText(mask1);
1751 tmp.
assign(c_start.m_starting_nodes);
1752 tmp.
bitANDC(c_start.m_starting_nodes_w_log);
1755 "Unable to start missing node group! "
1756 " starting: %s (missing fs for: %s)",
1758 progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1765 char buf[100], mask1[100];
1766 c_start.m_starting_nodes.getText(mask1);
1768 "Incomplete log for node group: %d! "
1769 " starting nodes: %s",
1770 incompleteng, mask1);
1771 progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1777 Qmgr::electionWon(
Signal* signal){
1778 NodeRecPtr myNodePtr;
1779 cpresident = getOwnNodeId();
1780 myNodePtr.i = getOwnNodeId();
1781 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1783 myNodePtr.p->phase = ZRUNNING;
1785 cpdistref = reference();
1788 myNodePtr.p->ndynamicId = 1 | (myNodePtr.p->hbOrder << 16);
1790 c_clusterNodes.
clear();
1791 c_clusterNodes.
set(getOwnNodeId());
1793 cpresidentAlive = ZTRUE;
1794 c_start_election_time = ~0;
1798 signal->theData[1] = getOwnNodeId();
1799 signal->theData[2] = cpresident;
1800 signal->theData[3] = myNodePtr.p->ndynamicId;
1801 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1803 c_start.m_starting_nodes.
clear(getOwnNodeId());
1804 if (c_start.m_starting_nodes.
isclear())
1807 sendSttorryLab(signal);
1820 void Qmgr::regreqTimeLimitLab(
Signal* signal)
1822 if(cpresident == ZNIL)
1824 if (c_start.m_president_candidate == ZNIL)
1827 c_start.m_president_candidate = getOwnNodeId();
1830 cmInfoconf010Lab(signal);
1845 void Qmgr::execCM_NODEINFOCONF(
Signal* signal)
1847 DEBUG_START3(signal,
"");
1853 const Uint32 nodeId = conf->nodeId;
1854 const Uint32 dynamicId = conf->dynamicId;
1855 const Uint32 version = conf->version;
1856 Uint32 mysql_version = conf->mysql_version;
1857 Uint32 lqh_workers = conf->lqh_workers;
1858 if (version < NDBD_SPLIT_VERSION)
1863 if (version < NDBD_MT_LQH_VERSION)
1870 nodePtr.i = getOwnNodeId();
1871 ptrAss(nodePtr, nodeRec);
1872 ndbrequire(nodePtr.p->phase == ZSTARTING);
1873 ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
1874 c_start.m_nodes.clearWaitingFor(nodeId);
1879 NodeRecPtr replyNodePtr;
1880 replyNodePtr.i = nodeId;
1881 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
1882 replyNodePtr.p->ndynamicId = dynamicId;
1883 replyNodePtr.p->blockRef = signal->getSendersBlockRef();
1884 setNodeInfo(replyNodePtr.i).
m_version = version;
1890 if(!c_start.m_nodes.done()){
1904 sendCmAckAdd(signal, getOwnNodeId(), CmAdd::Prepare);
1915 void Qmgr::execCM_NODEINFOREQ(
Signal* signal)
1919 const Uint32 Tblockref = signal->getSendersBlockRef();
1922 nodePtr.i = getOwnNodeId();
1923 ptrAss(nodePtr, nodeRec);
1924 if(nodePtr.p->phase != ZRUNNING){
1926 signal->theData[0] = reference();
1927 signal->theData[1] = getOwnNodeId();
1928 signal->theData[2] = ZNOT_RUNNING;
1929 sendSignal(Tblockref, GSN_CM_NODEINFOREF, signal, 3, JBB);
1933 NodeRecPtr addNodePtr;
1935 addNodePtr.i = req->nodeId;
1936 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
1937 addNodePtr.p->ndynamicId = req->dynamicId;
1938 addNodePtr.p->blockRef = signal->getSendersBlockRef();
1939 setNodeInfo(addNodePtr.i).
m_version = req->version;
1941 Uint32 mysql_version = req->mysql_version;
1942 if (req->version < NDBD_SPLIT_VERSION)
1946 Uint32 lqh_workers = req->lqh_workers;
1947 if (req->version < NDBD_MT_LQH_VERSION)
1951 c_maxDynamicId = req->dynamicId & 0xFFFF;
1953 cmAddPrepare(signal, addNodePtr, nodePtr.p);
1957 Qmgr::cmAddPrepare(
Signal* signal, NodeRecPtr nodePtr,
const NodeRec *
self){
1960 switch(nodePtr.p->phase){
1963 nodePtr.p->phase = ZSTARTING;
1969 warningEvent(
"Recieved request to incorperate node %u, "
1970 "while error handling has not yet completed",
1973 ndbrequire(getOwnNodeId() != cpresident);
1974 ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
1975 c_start.m_nodes.clearWaitingFor();
1977 c_start.m_gsn = GSN_CM_NODEINFOCONF;
1979 warningEvent(
"Enabling communication to CM_ADD node %u state=%d",
1982 nodePtr.p->phase = ZSTARTING;
1983 nodePtr.p->failState = NORMAL;
1984 signal->theData[0] = 0;
1985 signal->theData[1] = nodePtr.i;
1986 sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
1998 sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
1999 sendApiVersionRep(signal, nodePtr);
2003 conf->nodeId = getOwnNodeId();
2004 conf->dynamicId =
self->ndynamicId;
2008 sendSignal(nodePtr.p->blockRef, GSN_CM_NODEINFOCONF, signal,
2009 CmNodeInfoConf::SignalLength, JBB);
2010 DEBUG_START(GSN_CM_NODEINFOCONF, refToNode(nodePtr.p->blockRef),
"");
2014 Qmgr::sendApiVersionRep(
Signal* signal, NodeRecPtr nodePtr)
2019 Uint32 ref = calcQmgrBlockRef(nodePtr.i);
2020 for(Uint32 i = 1; i<MAX_NODES; i++)
2028 signal->theData[0] =
i;
2029 signal->theData[1] = version;
2030 sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
2037 Qmgr::sendCmAckAdd(
Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
2040 cmAckAdd->requestType =
type;
2041 cmAckAdd->startingNodeId = nodeId;
2042 cmAckAdd->senderNodeId = getOwnNodeId();
2043 sendSignal(cpdistref, GSN_CM_ACKADD, signal, CmAckAdd::SignalLength, JBA);
2044 DEBUG_START(GSN_CM_ACKADD, cpresident,
"");
2047 case CmAdd::Prepare:
2049 case CmAdd::AddCommit:
2050 case CmAdd::CommitNew:
2054 signal->theData[0] = nodeId;
2073 void Qmgr::execCM_ADD(
Signal* signal)
2075 NodeRecPtr addNodePtr;
2079 nodePtr.i = getOwnNodeId();
2080 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2082 CmAdd *
const cmAdd = (
CmAdd*)signal->getDataPtr();
2083 const CmAdd::RequestType type = (CmAdd::RequestType)cmAdd->requestType;
2084 addNodePtr.i = cmAdd->startingNodeId;
2086 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2088 DEBUG_START3(signal, type);
2090 if(nodePtr.p->phase == ZSTARTING){
2095 ndbrequire(addNodePtr.i == nodePtr.i);
2097 case CmAdd::Prepare:
2098 ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2103 case CmAdd::CommitNew:
2107 joinedCluster(signal, addNodePtr);
2109 case CmAdd::AddCommit:
2115 case CmAdd::Prepare:
2116 cmAddPrepare(signal, addNodePtr, nodePtr.p);
2118 case CmAdd::AddCommit:{
2120 ndbrequire(addNodePtr.p->phase == ZSTARTING);
2121 addNodePtr.p->phase = ZRUNNING;
2122 m_connectivity_check.reportNodeConnect(addNodePtr.i);
2124 c_clusterNodes.
set(addNodePtr.i);
2125 findNeighbours(signal, __LINE__);
2131 sendHeartbeat(signal);
2132 hb_send_timer.
reset(0);
2138 enableComReq->m_senderRef = reference();
2139 enableComReq->m_senderData = ENABLE_COM_CM_ADD_COMMIT;
2140 NodeBitmask::clear(enableComReq->m_nodeIds);
2141 NodeBitmask::set(enableComReq->m_nodeIds, addNodePtr.i);
2142 sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
2143 EnableComReq::SignalLength, JBA);
2146 case CmAdd::CommitNew:
2154 Qmgr::handleEnableComAddCommit(
Signal *signal, Uint32 node)
2156 sendCmAckAdd(signal, node, CmAdd::AddCommit);
2157 if(getOwnNodeId() != cpresident){
2164 Qmgr::execENABLE_COMCONF(
Signal *signal)
2168 Uint32 state = enableComConf->m_senderData;
2175 case ENABLE_COM_CM_ADD_COMMIT:
2178 ndbrequire(node != NodeBitmask::NotFound);
2180 NodeBitmask::NotFound);
2181 handleEnableComAddCommit(signal, node);
2184 case ENABLE_COM_CM_COMMIT_NEW:
2186 handleEnableComCommitNew(signal);
2189 case ENABLE_COM_API_REGREQ:
2192 ndbrequire(node != NodeBitmask::NotFound);
2194 NodeBitmask::NotFound);
2195 handleEnableComApiRegreq(signal, node);
2205 Qmgr::joinedCluster(
Signal* signal, NodeRecPtr nodePtr){
2211 nodePtr.p->phase = ZRUNNING;
2213 findNeighbours(signal, __LINE__);
2214 c_clusterNodes.
set(nodePtr.i);
2221 sendHeartbeat(signal);
2222 hb_send_timer.
reset(0);
2229 enableComReq->m_senderRef = reference();
2230 enableComReq->m_senderData = ENABLE_COM_CM_COMMIT_NEW;
2231 NodeBitmask::clear(enableComReq->m_nodeIds);
2232 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2234 ptrAss(nodePtr, nodeRec);
2235 if ((nodePtr.p->phase == ZRUNNING) && (nodePtr.i != getOwnNodeId())) {
2241 NodeBitmask::set(enableComReq->m_nodeIds, nodePtr.i);
2245 if (!NodeBitmask::isclear(enableComReq->m_nodeIds))
2248 sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
2249 EnableComReq::SignalLength, JBA);
2253 handleEnableComCommitNew(signal);
2258 Qmgr::handleEnableComCommitNew(
Signal *signal)
2260 sendSttorryLab(signal);
2262 sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
2270 void Qmgr::execCM_ACKADD(
Signal* signal)
2272 NodeRecPtr addNodePtr;
2273 NodeRecPtr senderNodePtr;
2277 const CmAdd::RequestType type = (CmAdd::RequestType)cmAckAdd->requestType;
2278 addNodePtr.i = cmAckAdd->startingNodeId;
2279 senderNodePtr.i = cmAckAdd->senderNodeId;
2281 DEBUG_START3(signal, type);
2283 if (cpresident != getOwnNodeId()) {
2288 warningEvent(
"Received CM_ACKADD from %d president=%d",
2289 senderNodePtr.i, cpresident);
2293 if (addNodePtr.i != c_start.m_startNode) {
2298 warningEvent(
"Received CM_ACKADD from %d with startNode=%d != own %d",
2299 senderNodePtr.i, addNodePtr.i, c_start.m_startNode);
2303 ndbrequire(c_start.m_gsn == GSN_CM_ADD);
2304 c_start.m_nodes.clearWaitingFor(senderNodePtr.i);
2305 if(!c_start.m_nodes.done()){
2311 case CmAdd::Prepare:{
2317 c_start.m_gsn = GSN_CM_ADD;
2318 c_start.m_nodes = c_clusterNodes;
2320 CmAdd *
const cmAdd = (
CmAdd*)signal->getDataPtrSend();
2321 cmAdd->requestType = CmAdd::AddCommit;
2322 cmAdd->startingNodeId = addNodePtr.i;
2326 sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
2327 DEBUG_START2(GSN_CM_ADD, rg,
"AddCommit");
2330 case CmAdd::AddCommit:{
2337 c_start.m_gsn = GSN_CM_ADD;
2338 c_start.m_nodes.clearWaitingFor();
2341 CmAdd *
const cmAdd = (
CmAdd*)signal->getDataPtrSend();
2342 cmAdd->requestType = CmAdd::CommitNew;
2343 cmAdd->startingNodeId = addNodePtr.i;
2346 sendSignal(calcQmgrBlockRef(addNodePtr.i), GSN_CM_ADD, signal,
2347 CmAdd::SignalLength, JBA);
2348 DEBUG_START(GSN_CM_ADD, addNodePtr.i,
"CommitNew");
2351 case CmAdd::CommitNew:
2356 handleArbitNdbAdd(signal, addNodePtr.i);
2359 if (c_start.m_starting_nodes.
get(addNodePtr.i))
2362 c_start.m_starting_nodes.
clear(addNodePtr.i);
2363 if (c_start.m_starting_nodes.
isclear())
2366 sendSttorryLab(signal);
2378 void Qmgr::findNeighbours(
Signal* signal, Uint32 from)
2380 UintR toldLeftNeighbour;
2384 UintR tfnRightFound;
2385 NodeRecPtr fnNodePtr;
2386 NodeRecPtr fnOwnNodePtr;
2388 Uint32 toldRightNeighbour = cneighbourh;
2389 toldLeftNeighbour = cneighbourl;
2392 tfnMinFound = (UintR)-1;
2393 tfnRightFound = (UintR)-1;
2394 fnOwnNodePtr.i = getOwnNodeId();
2395 ptrCheckGuard(fnOwnNodePtr, MAX_NDB_NODES, nodeRec);
2396 for (fnNodePtr.i = 1; fnNodePtr.i < MAX_NDB_NODES; fnNodePtr.i++) {
2398 ptrAss(fnNodePtr, nodeRec);
2399 if (fnNodePtr.i != fnOwnNodePtr.i) {
2400 if (fnNodePtr.p->phase == ZRUNNING) {
2401 if (tfnMinFound > fnNodePtr.p->ndynamicId) {
2403 tfnMinFound = fnNodePtr.p->ndynamicId;
2405 if (tfnMaxFound < fnNodePtr.p->ndynamicId) {
2407 tfnMaxFound = fnNodePtr.p->ndynamicId;
2409 if (fnOwnNodePtr.p->ndynamicId > fnNodePtr.p->ndynamicId) {
2411 if (fnNodePtr.p->ndynamicId > tfnLeftFound) {
2413 tfnLeftFound = fnNodePtr.p->ndynamicId;
2417 if (fnNodePtr.p->ndynamicId < tfnRightFound) {
2419 tfnRightFound = fnNodePtr.p->ndynamicId;
2425 if (tfnLeftFound == 0) {
2426 if (tfnMinFound == (UintR)-1) {
2431 cneighbourl = translateDynamicIdToNodeId(signal, tfnMaxFound);
2435 cneighbourl = translateDynamicIdToNodeId(signal, tfnLeftFound);
2437 if (tfnRightFound == (UintR)-1) {
2438 if (tfnMaxFound == 0) {
2443 cneighbourh = translateDynamicIdToNodeId(signal, tfnMinFound);
2447 cneighbourh = translateDynamicIdToNodeId(signal, tfnRightFound);
2449 if (toldLeftNeighbour != cneighbourl) {
2451 if (cneighbourl != ZNIL) {
2457 fnNodePtr.i = cneighbourl;
2458 ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
2464 signal->theData[1] = getOwnNodeId();
2465 signal->theData[2] = cneighbourl;
2466 signal->theData[3] = cneighbourh;
2467 signal->theData[4] = fnOwnNodePtr.p->ndynamicId;
2469 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, Tlen, JBB);
2470 g_eventLogger->
info(
"findNeighbours from: %u old (left: %u right: %u) new (%u %u)",
2482 void Qmgr::initData(
Signal* signal)
2484 NDB_TICKS now = NdbTick_CurrentMillisecond();
2485 interface_check_timer.
setDelay(1000);
2486 interface_check_timer.
reset(now);
2489 memset(&arbitRec, 0,
sizeof(arbitRec));
2495 m_ctx.m_config.getOwnConfigIterator();
2498 Uint32 hbDBDB = 1500;
2499 Uint32 arbitTimeout = 1000;
2500 Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
2501 Uint32 ccInterval = 0;
2502 c_restartPartialTimeout = 30000;
2503 c_restartPartionedTimeout = 60000;
2504 c_restartFailureTimeout = ~0;
2505 c_restartNoNodegroupTimeout = 15000;
2506 ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
2507 ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
2508 ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_METHOD, &arbitMethod);
2509 ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
2510 &c_restartPartialTimeout);
2511 ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
2512 &c_restartPartionedTimeout);
2513 ndb_mgm_get_int_parameter(p, CFG_DB_START_NO_NODEGROUP_TIMEOUT,
2514 &c_restartNoNodegroupTimeout);
2515 ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
2516 &c_restartFailureTimeout);
2517 ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
2520 if(c_restartPartialTimeout == 0)
2522 c_restartPartialTimeout = ~0;
2525 if (c_restartPartionedTimeout ==0)
2527 c_restartPartionedTimeout = ~0;
2530 if (c_restartFailureTimeout == 0)
2532 c_restartFailureTimeout = ~0;
2535 if (c_restartNoNodegroupTimeout == 0)
2537 c_restartNoNodegroupTimeout = ~0;
2541 setCCDelay(ccInterval);
2542 setArbitTimeout(arbitTimeout);
2544 arbitRec.method = (ArbitRec::Method)arbitMethod;
2545 arbitRec.state = ARBIT_NULL;
2546 arbitRec.apiMask[0].
clear();
2550 for (
unsigned rank = 1; rank <= 2; rank++) {
2551 sd->sender = getOwnNodeId();
2557 m_ctx.m_config.getClusterConfigIterator();
2558 for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter)) {
2560 if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ARBIT_RANK, &tmp) == 0 &&
2563 ndbrequire(!ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId));
2564 sd->mask.
set(nodeId);
2567 sum += sd->mask.
count();
2568 execARBIT_CFG(signal);
2571 if (arbitRec.method == ArbitRec::METHOD_DEFAULT &&
2575 infoEvent(
"Arbitration disabled, all API nodes have rank 0");
2576 arbitRec.method = ArbitRec::DISABLED;
2582 m_ctx.m_config.getClusterConfigIterator();
2583 for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
2587 if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0)
2593 ndb_mgm_get_int_parameter(iter, CFG_DB_HB_ORDER, &hbOrder);
2597 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2598 nodePtr.p->hbOrder = hbOrder;
2602 int hb_order_error = check_hb_order_config();
2603 if (hb_order_error == -1)
2605 char msg[] =
"Illegal HeartbeatOrder config, "
2606 "all nodes must have non-zero config value";
2607 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2610 if (hb_order_error == -2)
2612 char msg[] =
"Illegal HeartbeatOrder config, "
2613 "the nodes must have distinct config values";
2614 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2617 ndbrequire(hb_order_error == 0);
2627 void Qmgr::timerHandlingLab(
Signal* signal)
2629 NDB_TICKS TcurrentTime = NdbTick_CurrentMillisecond();
2630 NodeRecPtr myNodePtr;
2631 myNodePtr.i = getOwnNodeId();
2632 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
2634 Uint32 sentHi = signal->theData[1];
2635 Uint32 sentLo = signal->theData[2];
2636 Uint64 sent = (Uint64(sentHi) << 32) + sentLo;
2638 if (TcurrentTime >= sent + 1000 || (TcurrentTime < sent))
2641 g_eventLogger->
warning(
"timerHandlingLab now: %llu sent: %llu diff: %d",
2642 TcurrentTime, sent,
int(TcurrentTime - sent));
2644 else if (TcurrentTime >= sent + 150)
2646 g_eventLogger->
info(
"timerHandlingLab now: %llu sent: %llu diff: %d",
2647 TcurrentTime, sent,
int(TcurrentTime - sent));
2650 if (myNodePtr.p->phase == ZRUNNING) {
2655 if (hb_send_timer.
check(TcurrentTime)) {
2657 sendHeartbeat(signal);
2658 hb_send_timer.
reset(TcurrentTime);
2660 if (likely(! m_connectivity_check.m_active))
2662 if (hb_check_timer.
check(TcurrentTime)) {
2664 checkHeartbeat(signal);
2665 hb_check_timer.
reset(TcurrentTime);
2671 if (m_connectivity_check.m_timer.
check(TcurrentTime)) {
2673 checkConnectivityTimeSignal(signal);
2674 m_connectivity_check.m_timer.
reset(TcurrentTime);
2679 if (interface_check_timer.
check(TcurrentTime)) {
2681 interface_check_timer.
reset(TcurrentTime);
2682 checkStartInterface(signal, TcurrentTime);
2685 if (hb_api_timer.
check(TcurrentTime))
2688 hb_api_timer.
reset(TcurrentTime);
2689 apiHbHandlingLab(signal, TcurrentTime);
2692 if (cactivateApiCheck != 0) {
2694 if (clatestTransactionCheck == 0) {
2698 clatestTransactionCheck = TcurrentTime;
2701 while (TcurrentTime > ((NDB_TICKS)10 + clatestTransactionCheck)) {
2703 clatestTransactionCheck += (NDB_TICKS)10;
2704 sendSignal(DBTC_REF, GSN_TIME_SIGNAL, signal, 1, JBB);
2705 sendSignal(DBLQH_REF, GSN_TIME_SIGNAL, signal, 1, JBB);
2719 signal->theData[0] = ZTIMER_HANDLING;
2720 signal->theData[1] = Uint32(TcurrentTime >> 32);
2721 signal->theData[2] = Uint32(TcurrentTime);
2722 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 10, 3);
2729 void Qmgr::sendHeartbeat(
Signal* signal)
2731 NodeRecPtr localNodePtr;
2732 localNodePtr.i = cneighbourh;
2733 if (localNodePtr.i == ZNIL) {
2741 ptrCheckGuard(localNodePtr, MAX_NDB_NODES, nodeRec);
2742 signal->theData[0] = getOwnNodeId();
2744 sendSignal(localNodePtr.p->blockRef, GSN_CM_HEARTBEAT, signal, 1, JBA);
2747 signal->theData[1] = localNodePtr.i;
2748 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2752 void Qmgr::checkHeartbeat(
Signal* signal)
2756 nodePtr.i = cneighbourl;
2757 if (nodePtr.i == ZNIL) {
2765 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2768 ndbrequire(nodePtr.p->phase == ZRUNNING);
2773 signal->theData[1] = nodePtr.i;
2775 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2780 if (m_connectivity_check.getEnabled())
2784 startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
2794 signal->theData[1] = nodePtr.i;
2795 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2797 failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
2803 void Qmgr::apiHbHandlingLab(
Signal* signal, Uint64 now)
2805 NodeRecPtr TnodePtr;
2807 for (TnodePtr.i = 1; TnodePtr.i < MAX_NODES; TnodePtr.i++) {
2808 const Uint32 nodeId = TnodePtr.i;
2809 ptrAss(TnodePtr, nodeRec);
2818 if (c_connectedNodes.
get(nodeId))
2826 signal->theData[1] = nodeId;
2828 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2842 signal->theData[1] = nodeId;
2843 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2845 api_failed(signal, nodeId);
2848 else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
2849 TnodePtr.p->m_secret != 0 && now > TnodePtr.p->m_alloc_timeout)
2852 TnodePtr.p->m_secret = 0;
2853 warningEvent(
"Releasing node id allocation for node %u",
2860 void Qmgr::checkStartInterface(
Signal* signal, Uint64 now)
2868 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
2869 ptrAss(nodePtr, nodeRec);
2871 if (nodePtr.p->phase == ZFAIL_CLOSING) {
2874 if (c_connectedNodes.
get(nodePtr.i)){
2883 && (nodePtr.p->failState == NORMAL)) {
2890 nodePtr.p->failState = NORMAL;
2891 nodePtr.p->m_secret = 0;
2895 nodePtr.p->phase = ZINIT;
2899 nodePtr.p->phase = ZAPI_INACTIVE;
2903 if (c_allow_api_connect)
2906 nodePtr.p->phase = ZAPI_INACTIVE;
2921 signal->theData[0] = 0;
2922 signal->theData[1] = nodePtr.i;
2923 sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
2936 "Failure handling of node %d has not completed"
2937 " in %d min - state = %d",
2940 nodePtr.p->failState);
2948 signal->theData[0] = 7019;
2949 signal->theData[1] = nodePtr.i;
2950 sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
2957 "Failure handling of api %u has not completed"
2958 " in %d min - state = %d",
2961 nodePtr.p->failState);
2963 if (nodePtr.p->failState == WAITING_FOR_API_FAILCONF)
2966 compile_time_assert(NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) == 5);
2968 " Waiting for blocks: %u %u %u %u %u",
2969 nodePtr.p->m_failconf_blocks[0],
2970 nodePtr.p->m_failconf_blocks[1],
2971 nodePtr.p->m_failconf_blocks[2],
2972 nodePtr.p->m_failconf_blocks[3],
2973 nodePtr.p->m_failconf_blocks[4]);
2980 else if (type ==
NodeInfo::DB && nodePtr.p->phase == ZINIT &&
2981 nodePtr.p->m_secret != 0 && now > nodePtr.p->m_alloc_timeout)
2984 nodePtr.p->m_secret = 0;
2985 warningEvent(
"Releasing node id allocation for node %u",
2996 void Qmgr::sendApiFailReq(
Signal* signal, Uint16 failedNodeNo,
bool sumaOnly)
2999 signal->theData[0] = failedNodeNo;
3000 signal->theData[1] = QMGR_REF;
3008 Uint32 routedSignalSectionI = RNIL;
3009 ndbrequire(appendToSection(routedSignalSectionI,
3010 &signal->theData[0],
3016 routeOrd->srcRef = reference();
3017 routeOrd->gsn = GSN_API_FAILREQ;
3019 NodeRecPtr failedNodePtr;
3020 failedNodePtr.i = failedNodeNo;
3021 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3022 failedNodePtr.p->failState = WAITING_FOR_API_FAILCONF;
3035 add_failconf_block(failedNodePtr, DBTC);
3036 routeOrd->dstRef = DBTC_REF;
3038 RouteOrd::SignalLength,
3041 add_failconf_block(failedNodePtr, DBDICT);
3042 routeOrd->dstRef = DBDICT_REF;
3044 RouteOrd::SignalLength,
3047 add_failconf_block(failedNodePtr, DBSPJ);
3048 routeOrd->dstRef = DBSPJ_REF;
3050 RouteOrd::SignalLength,
3055 add_failconf_block(failedNodePtr, SUMA);
3056 routeOrd->dstRef = SUMA_REF;
3057 sendSignal(CMVMI_REF, GSN_ROUTE_ORD, signal,
3058 RouteOrd::SignalLength,
3062 void Qmgr::execAPI_FAILREQ(
Signal* signal)
3065 NodeRecPtr failedNodePtr;
3066 failedNodePtr.i = signal->theData[0];
3068 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3072 api_failed(signal, signal->theData[0]);
3075 void Qmgr::execAPI_FAILCONF(
Signal* signal)
3077 NodeRecPtr failedNodePtr;
3080 failedNodePtr.i = signal->theData[0];
3081 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3083 Uint32
block = refToMain(signal->theData[1]);
3084 if (failedNodePtr.p->failState != WAITING_FOR_API_FAILCONF ||
3085 !remove_failconf_block(failedNodePtr, block))
3088 ndbout <<
"execAPI_FAILCONF from " << block
3089 <<
" failedNodePtr.p->failState = "
3090 << (Uint32)(failedNodePtr.p->failState)
3092 for (Uint32 i = 0;i<NDB_ARRAY_SIZE(failedNodePtr.p->m_failconf_blocks);i++)
3094 printf(
"%u ", failedNodePtr.p->m_failconf_blocks[i]);
3097 systemErrorLab(signal, __LINE__);
3100 if (is_empty_failconf_block(failedNodePtr))
3103 failedNodePtr.p->failState = NORMAL;
3114 Qmgr::add_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3118 for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3121 if (nodePtr.p->m_failconf_blocks[pos] == 0)
3126 else if (nodePtr.p->m_failconf_blocks[pos] == block)
3133 ndbrequire(pos != NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks));
3134 ndbassert(nodePtr.p->m_failconf_blocks[pos] != block);
3135 if (nodePtr.p->m_failconf_blocks[pos] == block)
3146 ndbrequire(nodePtr.p->m_failconf_blocks[pos] == 0);
3147 nodePtr.p->m_failconf_blocks[pos] =
block;
3151 Qmgr::remove_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3155 for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3158 if (nodePtr.p->m_failconf_blocks[pos] == 0)
3163 else if (nodePtr.p->m_failconf_blocks[pos] == block)
3170 if (pos == NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) ||
3171 nodePtr.p->m_failconf_blocks[pos] != block)
3180 nodePtr.p->m_failconf_blocks[pos] = 0;
3181 for (pos++; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3184 nodePtr.p->m_failconf_blocks[pos - 1] = nodePtr.p->m_failconf_blocks[pos];
3191 Qmgr::is_empty_failconf_block(NodeRecPtr nodePtr)
const
3193 return nodePtr.p->m_failconf_blocks[0] == 0;
3196 void Qmgr::execNDB_FAILCONF(
Signal* signal)
3198 NodeRecPtr failedNodePtr;
3202 failedNodePtr.i = signal->theData[0];
3204 if (ERROR_INSERTED(930))
3206 CLEAR_ERROR_INSERT_VALUE;
3207 infoEvent(
"Discarding NDB_FAILCONF for %u", failedNodePtr.i);
3211 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3212 if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF){
3213 failedNodePtr.p->failState = NORMAL;
3219 "Received NDB_FAILCONF for node %u with state: %d %d",
3221 failedNodePtr.p->phase,
3222 failedNodePtr.p->failState);
3224 systemErrorLab(signal, __LINE__);
3227 if (cpresident == getOwnNodeId())
3231 CRASH_INSERTION(936);
3244 nfComp->
nodeId = getOwnNodeId();
3247 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3250 ptrAss(nodePtr, nodeRec);
3251 if (nodePtr.p->phase == ZAPI_ACTIVE){
3253 sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
3254 NFCompleteRep::SignalLength, JBB);
3261 Qmgr::execNF_COMPLETEREP(
Signal* signal)
3280 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3283 ptrAss(nodePtr, nodeRec);
3284 if (nodePtr.p->phase == ZAPI_ACTIVE &&
3288 sendSignal(nodePtr.p->blockRef, GSN_TAKE_OVERTCCONF, signal,
3289 NFCompleteRep::SignalLength, JBB);
3298 const char *lookupConnectionError(Uint32 err);
3300 void Qmgr::execDISCONNECT_REP(
Signal* signal)
3304 const Uint32 nodeId = rep->nodeId;
3305 const Uint32 err = rep->err;
3307 c_connectedNodes.
clear(nodeId);
3315 nodePtr.i = getOwnNodeId();
3316 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
3323 CRASH_INSERTION(932);
3324 CRASH_INSERTION(938);
3326 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3333 api_failed(signal, nodeId);
3337 switch(nodePtr.p->phase){
3344 progError(__LINE__, NDBD_EXIT_CONNECTION_SETUP_FAILED,
3345 lookupConnectionError(err));
3356 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3360 node_failed(signal, nodeId);
3363 void Qmgr::node_failed(
Signal* signal, Uint16 aFailedNode)
3365 NodeRecPtr failedNodePtr;
3370 failedNodePtr.i = aFailedNode;
3371 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3372 failedNodePtr.p->m_secret = 0;
3380 switch(failedNodePtr.p->phase){
3383 failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3393 failedNodePtr.p->phase = ZRUNNING;
3394 failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3403 failedNodePtr.p->failState = NORMAL;
3404 failedNodePtr.p->phase = ZFAIL_CLOSING;
3410 closeCom->xxxBlockRef = reference();
3411 closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
3412 closeCom->failNo = 0;
3413 closeCom->noOfNodes = 1;
3414 NodeBitmask::clear(closeCom->theNodes);
3415 NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3416 sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
3417 CloseComReqConf::SignalLength, JBA);
3423 Qmgr::execUPGRADE_PROTOCOL_ORD(
Signal* signal)
3427 case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
3429 m_micro_gcp_enabled =
true;
3435 Qmgr::api_failed(
Signal* signal, Uint32 nodeId)
3437 NodeRecPtr failedNodePtr;
3442 failedNodePtr.i = nodeId;
3443 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3444 failedNodePtr.p->m_secret = 0;
3446 if (failedNodePtr.p->phase == ZFAIL_CLOSING)
3455 ndbrequire(failedNodePtr.p->failState == NORMAL);
3462 signal->theData[0] = failedNodePtr.i;
3463 signal->theData[1] = QMGR_REF;
3465 sendSignal(rg, GSN_API_FAILREQ, signal, 2, JBA);
3468 FailState initialState = (failedNodePtr.p->phase == ZAPI_ACTIVE) ?
3469 WAITING_FOR_CLOSECOMCONF_ACTIVE :
3470 WAITING_FOR_CLOSECOMCONF_NOTACTIVE;
3472 failedNodePtr.p->failState = initialState;
3473 failedNodePtr.p->phase = ZFAIL_CLOSING;
3475 setNodeInfo(failedNodePtr.i).
m_version = 0;
3479 closeCom->xxxBlockRef = reference();
3480 closeCom->requestType = CloseComReqConf::RT_API_FAILURE;
3481 closeCom->failNo = 0;
3482 closeCom->noOfNodes = 1;
3483 NodeBitmask::clear(closeCom->theNodes);
3484 NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3485 sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
3486 CloseComReqConf::SignalLength, JBA);
3496 void Qmgr::execAPI_REGREQ(
Signal* signal)
3501 const Uint32 version = req->version;
3502 const BlockReference ref = req->ref;
3504 Uint32 mysql_version = req->mysql_version;
3505 if (version < NDBD_SPLIT_VERSION)
3508 NodeRecPtr apiNodePtr;
3509 apiNodePtr.i = refToNode(ref);
3510 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3512 if (apiNodePtr.p->phase == ZFAIL_CLOSING)
3523 ndbout_c(
"Qmgr::execAPI_REGREQ: Recd API_REGREQ (NodeId=%d)", apiNodePtr.i);
3526 bool compatability_check;
3527 const char *
extra = 0;
3531 if (m_micro_gcp_enabled && !ndb_check_micro_gcp(version))
3534 compatability_check =
false;
3535 extra =
": micro gcp enabled";
3540 compatability_check = ndbCompatible_ndb_api(NDB_VERSION, version);
3544 compatability_check = ndbCompatible_ndb_mgmt(NDB_VERSION, version);
3549 sendApiRegRef(signal, ref, ApiRegRef::WrongType);
3550 infoEvent(
"Invalid connection attempt with type %d", type);
3554 if (!compatability_check) {
3556 char buf[NDB_VERSION_STRING_BUF_SZ];
3557 infoEvent(
"Connection attempt from %s id=%d with %s "
3558 "incompatible with %s%s",
3559 type ==
NodeInfo::API ?
"api or mysqld" :
"management server",
3561 ndbGetVersionString(version, mysql_version, 0,
3565 extra ? extra :
"");
3566 apiNodePtr.p->phase = ZAPI_INACTIVE;
3567 sendApiRegRef(signal, ref, ApiRegRef::UnsupportedVersion);
3571 setNodeInfo(apiNodePtr.i).
m_version = version;
3576 if (apiNodePtr.p->phase == ZAPI_INACTIVE)
3578 apiNodePtr.p->blockRef = ref;
3582 state.starting.startPhase >= 100)))
3589 apiNodePtr.p->phase = ZAPI_ACTIVE;
3591 enableComReq->m_senderRef = reference();
3592 enableComReq->m_senderData = ENABLE_COM_API_REGREQ;
3593 NodeBitmask::clear(enableComReq->m_nodeIds);
3594 NodeBitmask::set(enableComReq->m_nodeIds, apiNodePtr.i);
3595 sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
3596 EnableComReq::SignalLength, JBA);
3601 sendApiRegConf(signal, apiNodePtr.i);
3605 Qmgr::handleEnableComApiRegreq(
Signal *signal, Uint32 node)
3609 recompute_version_info(type, version);
3611 signal->theData[0] = node;
3612 signal->theData[1] = version;
3614 rg.m_nodes.clear(getOwnNodeId());
3615 sendVersionedDb(rg, GSN_NODE_VERSION_REP, signal, 2, JBB,
3616 NDBD_NODE_VERSION_REP);
3618 signal->theData[0] = node;
3621 sendApiRegConf(signal, node);
3625 Qmgr::sendApiRegConf(
Signal *signal, Uint32 node)
3627 NodeRecPtr apiNodePtr;
3628 apiNodePtr.i = node;
3629 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3630 const BlockReference ref = apiNodePtr.p->blockRef;
3631 ndbassert(ref != 0);
3634 apiRegConf->qmgrRef = reference();
3635 apiRegConf->apiHeartbeatFrequency = (chbApiDelay / 10);
3636 apiRegConf->version = NDB_VERSION;
3637 apiRegConf->mysql_version = NDB_MYSQL_VERSION_D;
3641 nodePtr.i = getOwnNodeId();
3642 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3643 Uint32 dynamicId = nodePtr.p->ndynamicId;
3645 if(apiRegConf->nodeState.masterNodeId != getOwnNodeId()){
3647 apiRegConf->nodeState.dynamicId = dynamicId;
3649 apiRegConf->nodeState.dynamicId = (Uint32)(-(Int32)dynamicId);
3653 apiRegConf->minDbVersion = info.m_type[
NodeInfo::DB].m_min_version;
3654 apiRegConf->nodeState.m_connected_nodes.
assign(c_connectedNodes);
3655 sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
3660 GlobalSignalNumber gsn,
3663 JobBufferLevel jbuf,
3668 if (info.m_type[
NodeInfo::DB].m_min_version >= minversion)
3671 sendSignal(rg, gsn, signal, length, jbuf);
3676 Uint32 i = 0, cnt = 0;
3677 while((i = rg.m_nodes.
find(i + 1)) != NodeBitmask::NotFound)
3684 sendSignal(numberToRef(rg.m_block, i), gsn, signal, length, jbuf);
3687 ndbassert((cnt == 0 && rg.m_nodes.
count() == 0) ||
3688 (cnt < rg.m_nodes.
count()));
3693 Qmgr::execAPI_VERSION_REQ(
Signal * signal) {
3697 Uint32 senderRef = req->senderRef;
3698 Uint32 nodeId = req->nodeId;
3705 struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId);
3706 conf->inet_addr= in.s_addr;
3711 conf->mysql_version = 0;
3714 conf->nodeId = nodeId;
3716 sendSignal(senderRef,
3717 GSN_API_VERSION_CONF,
3719 ApiVersionConf::SignalLength, JBB);
3723 Qmgr::execNODE_VERSION_REP(
Signal* signal)
3726 Uint32 nodeId = signal->theData[0];
3727 Uint32 version = signal->theData[1];
3729 if (nodeId < MAX_NODES)
3733 setNodeInfo(nodeId).
m_version = version;
3734 recompute_version_info(type, version);
3739 Qmgr::recompute_version_info(Uint32 type, Uint32 version)
3751 if (info.m_type[type].m_min_version == 0 ||
3752 version < info.m_type[type].m_min_version)
3753 info.m_type[
type].m_min_version = version;
3754 if (version > info.m_type[type].m_max_version)
3755 info.m_type[
type].m_max_version = version;
3759 Qmgr::recompute_version_info(Uint32 type)
3770 Uint32 min = ~0, max = 0;
3771 Uint32 cnt = type ==
NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
3772 for (Uint32 i = 1; i<cnt; i++)
3789 info.m_type[
type].m_min_version = min == ~(Uint32)0 ? 0 : min;
3790 info.m_type[
type].m_max_version = max;
3795 Qmgr::checkAPIVersion(NodeId nodeId,
3796 Uint32 apiVersion, Uint32 ownVersion)
const {
3801 if ((getMajor(apiVersion) < getMajor(ownVersion) ||
3802 getMinor(apiVersion) < getMinor(ownVersion)) &&
3803 apiVersion >= API_UPGRADE_VERSION) {
3821 Qmgr::sendApiRegRef(
Signal* signal, Uint32 Tref, ApiRegRef::ErrorCode err){
3823 ref->ref = reference();
3824 ref->version = NDB_VERSION;
3825 ref->mysql_version = NDB_MYSQL_VERSION_D;
3826 ref->errorCode = err;
3827 sendSignal(Tref, GSN_API_REGREF, signal, ApiRegRef::SignalLength, JBB);
3835 void Qmgr::failReportLab(
Signal* signal, Uint16 aFailedNode,
3836 FailRep::FailCause aFailCause,
3840 NodeRecPtr failedNodePtr;
3841 NodeRecPtr myNodePtr;
3842 UintR TnoFailedNodes;
3844 failedNodePtr.i = aFailedNode;
3845 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3848 if (check_multi_node_shutdown(signal))
3854 if (isNodeConnectivitySuspect(sourceNode) &&
3856 ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
3857 (aFailCause == FailRep::ZLINK_FAILURE)))
3863 ndbrequire(sourceNode != getOwnNodeId());
3865 handleFailFromSuspect(signal,
3872 if (failedNodePtr.i == getOwnNodeId()) {
3875 Uint32
code = NDBD_EXIT_NODE_DECLARED_DEAD;
3876 const char *
msg = 0;
3879 case FailRep::ZOWN_FAILURE:
3880 msg =
"Own failure";
3882 case FailRep::ZOTHER_NODE_WHEN_WE_START:
3883 case FailRep::ZOTHERNODE_FAILED_DURING_START:
3884 msg =
"Other node died during start";
3886 case FailRep::ZIN_PREP_FAIL_REQ:
3889 case FailRep::ZSTART_IN_REGREQ:
3890 msg =
"Start timeout";
3892 case FailRep::ZHEARTBEAT_FAILURE:
3893 msg =
"Heartbeat failure";
3895 case FailRep::ZLINK_FAILURE:
3896 msg =
"Connection failure";
3898 case FailRep::ZPARTITIONED_CLUSTER:
3900 code = NDBD_EXIT_PARTITIONED_SHUTDOWN;
3901 char buf1[100], buf2[100];
3903 if (((signal->getLength()== FailRep::OrigSignalLength + FailRep::PartitionedExtraLength) ||
3904 (signal->getLength()== FailRep::SignalLength + FailRep::PartitionedExtraLength)) &&
3905 signal->header.theVerId_signalNumber == GSN_FAIL_REP)
3909 part.
assign(NdbNodeBitmask::Size, rep->partitioned.partition);
3912 "Our cluster: %s other cluster: %s",
3919 "Our cluster: %s", buf1);
3924 case FailRep::ZMULTI_NODE_SHUTDOWN:
3925 msg =
"Multi node shutdown";
3927 case FailRep::ZCONNECT_CHECK_FAILURE:
3928 msg =
"Connectivity check failure";
3934 CRASH_INSERTION(932);
3935 CRASH_INSERTION(938);
3939 "We(%u) have been declared dead by %u (via %u) reason: %s(%u)",
3942 refToNode(signal->getSendersBlockRef()),
3943 msg ? msg :
"<Unknown>",
3950 myNodePtr.i = getOwnNodeId();
3951 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
3952 if (myNodePtr.p->phase != ZRUNNING) {
3954 systemErrorLab(signal, __LINE__);
3961 CRASH_INSERTION(932);
3962 CRASH_INSERTION(938);
3965 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3969 TnoFailedNodes = cnoFailedNodes;
3970 failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause, sourceNode);
3971 if (cpresident == getOwnNodeId()) {
3973 if (ctoStatus == Q_NOT_ACTIVE) {
3985 if (TnoFailedNodes != cnoFailedNodes) {
3987 cfailureNr = cfailureNr + 1;
3989 nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
3991 ptrAss(nodePtr, nodeRec);
3992 if (nodePtr.p->phase == ZRUNNING) {
3994 sendPrepFailReq(signal, nodePtr.i);
4011 void Qmgr::execPREP_FAILREQ(
Signal* signal)
4013 NodeRecPtr myNodePtr;
4018 if (check_multi_node_shutdown(signal))
4026 BlockReference Tblockref = prepFail->xxxBlockRef;
4027 Uint16 TfailureNr = prepFail->failNo;
4028 cnoPrepFailedNodes = prepFail->noOfNodes;
4029 UintR arrayIndex = 0;
4031 for (Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4033 cprepFailedNodes[arrayIndex] = Tindex;
4045 block->failNo = TfailureNr;
4047 BlockCommitOrd::SignalLength);
4049 myNodePtr.i = getOwnNodeId();
4050 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4051 if (myNodePtr.p->phase != ZRUNNING) {
4053 systemErrorLab(signal, __LINE__);
4060 CRASH_INSERTION(932);
4061 CRASH_INSERTION(938);
4064 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4068 guard0 = cnoPrepFailedNodes - 1;
4069 arrGuard(guard0, MAX_NDB_NODES);
4070 for (Tindex = 0; Tindex <= guard0; Tindex++) {
4073 cprepFailedNodes[Tindex],
4075 FailRep::ZIN_PREP_FAIL_REQ,
4078 sendCloseComReq(signal, Tblockref, TfailureNr);
4079 cnoCommitFailedNodes = 0;
4080 cprepareFailureNr = TfailureNr;
4085 void Qmgr::handleApiCloseComConf(
Signal* signal)
4091 for(Uint32 nodeId = 0; nodeId < MAX_NODES; nodeId ++)
4100 ndbrequire(closeCom->noOfNodes == 1);
4101 NodeBitmask::clear(closeCom->theNodes, nodeId);
4102 ndbrequire(NodeBitmask::isclear(closeCom->theNodes));
4108 NodeRecPtr failedNodePtr;
4109 failedNodePtr.i = nodeId;
4110 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4112 ndbrequire((failedNodePtr.p->failState ==
4113 WAITING_FOR_CLOSECOMCONF_ACTIVE) ||
4114 (failedNodePtr.p->failState ==
4115 WAITING_FOR_CLOSECOMCONF_NOTACTIVE));
4117 if (failedNodePtr.p->failState == WAITING_FOR_CLOSECOMCONF_ACTIVE)
4123 sendApiFailReq(signal, nodeId,
false);
4124 arbitRec.code = ArbitCode::ApiFail;
4125 handleArbitApiFail(signal, nodeId);
4133 sendApiFailReq(signal, nodeId,
true);
4162 void Qmgr::execCLOSE_COMCONF(
Signal* signal)
4168 Uint32 requestType = closeCom->requestType;
4170 if (requestType == CloseComReqConf::RT_API_FAILURE)
4173 handleApiCloseComConf(signal);
4178 ndbassert(requestType == CloseComReqConf::RT_NODE_FAILURE);
4179 BlockReference Tblockref = closeCom->xxxBlockRef;
4180 Uint16 TfailureNr = closeCom->failNo;
4182 cnoPrepFailedNodes = closeCom->noOfNodes;
4183 UintR arrayIndex = 0;
4185 for(Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++){
4187 cprepFailedNodes[arrayIndex] = Tindex;
4191 ndbassert(arrayIndex == cnoPrepFailedNodes);
4192 UintR tprepFailConf;
4197 Uint16 TfailedNodeNo;
4199 tprepFailConf = ZTRUE;
4200 if (cnoFailedNodes > 0) {
4210 guard0 = cnoFailedNodes - 1;
4211 arrGuard(guard0, MAX_NDB_NODES);
4212 for (Tindex = 0; Tindex <= guard0; Tindex++) {
4214 TfailedNodeNo = cfailedNodes[Tindex];
4216 guard1 = cnoPrepFailedNodes - 1;
4217 arrGuard(guard1, MAX_NDB_NODES);
4218 for (Tindex2 = 0; Tindex2 <= guard1; Tindex2++) {
4220 if (TfailedNodeNo == cprepFailedNodes[Tindex2]) {
4225 if (Tfound == ZFALSE) {
4231 tprepFailConf = ZFALSE;
4232 arrGuard(cnoPrepFailedNodes, MAX_NDB_NODES);
4233 cprepFailedNodes[cnoPrepFailedNodes] = TfailedNodeNo;
4234 cnoPrepFailedNodes = cnoPrepFailedNodes + 1;
4238 if (tprepFailConf == ZFALSE) {
4244 for (Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4245 cfailedNodes[Tindex] = cprepFailedNodes[Tindex];
4247 cnoFailedNodes = cnoPrepFailedNodes;
4248 sendPrepFailReqRef(signal,
4260 cnoCommitFailedNodes = cnoPrepFailedNodes;
4261 guard0 = cnoPrepFailedNodes - 1;
4262 arrGuard(guard0, MAX_NDB_NODES);
4263 for (Tindex = 0; Tindex <= guard0; Tindex++) {
4265 arrGuard(Tindex, MAX_NDB_NODES);
4266 ccommitFailedNodes[Tindex] = cprepFailedNodes[Tindex];
4268 signal->theData[0] = getOwnNodeId();
4269 signal->theData[1] = TfailureNr;
4270 sendSignal(Tblockref, GSN_PREP_FAILCONF, signal, 2, JBA);
4281 void Qmgr::execPREP_FAILCONF(
Signal* signal)
4284 NodeRecPtr replyNodePtr;
4286 replyNodePtr.i = signal->theData[0];
4287 Uint16 TfailureNr = signal->theData[1];
4288 if (TfailureNr != cfailureNr) {
4296 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4297 replyNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4298 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4300 ptrAss(nodePtr, nodeRec);
4301 if (nodePtr.p->phase == ZRUNNING) {
4302 if (nodePtr.p->sendPrepFailReqStatus == Q_ACTIVE) {
4312 arbitRec.failureNr = cfailureNr;
4323 switch(arbitRec.method){
4324 case ArbitRec::DISABLED:
4327 sendCommitFailReq(signal);
4330 case ArbitRec::METHOD_EXTERNAL:
4331 case ArbitRec::METHOD_DEFAULT:
4333 handleArbitCheck(signal);
4341 Qmgr::sendCommitFailReq(
Signal* signal)
4345 if (arbitRec.failureNr != cfailureNr) {
4357 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4359 ptrAss(nodePtr, nodeRec);
4362 if (
false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
4364 ndbout_c(
"skipping node %d", c_error_insert_extra);
4365 CLEAR_ERROR_INSERT_VALUE;
4366 signal->theData[0] = 9999;
4367 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
4372 if (nodePtr.p->phase == ZRUNNING) {
4374 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4375 signal->theData[0] = cpdistref;
4376 signal->theData[1] = cfailureNr;
4377 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4380 ctoStatus = Q_ACTIVE;
4392 void Qmgr::execPREP_FAILREF(
Signal* signal)
4399 Uint16 TfailureNr = prepFail->failNo;
4400 cnoPrepFailedNodes = prepFail->noOfNodes;
4402 UintR arrayIndex = 0;
4404 for(Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4408 cprepFailedNodes[arrayIndex] = Tindex;
4412 if (TfailureNr != cfailureNr) {
4423 cnoFailedNodes = cnoPrepFailedNodes;
4424 guard0 = cnoPrepFailedNodes - 1;
4425 arrGuard(guard0, MAX_NDB_NODES);
4426 for (Ti = 0; Ti <= guard0; Ti++) {
4428 cfailedNodes[Ti] = cprepFailedNodes[Ti];
4430 cfailureNr = cfailureNr + 1;
4431 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4433 ptrAss(nodePtr, nodeRec);
4434 if (nodePtr.p->phase == ZRUNNING) {
4436 sendPrepFailReq(signal, nodePtr.i);
4444 clear_nodes(Uint32 dstcnt, Uint16 dst[], Uint32 srccnt,
const Uint16 src[])
4450 for (Uint32 i = 0; i<dstcnt; i++)
4452 Uint32 node = dst[
i];
4453 for (Uint32 j = 0; j<srccnt; j++)
4475 void Qmgr::execCOMMIT_FAILREQ(
Signal* signal)
4480 CRASH_INSERTION(935);
4482 BlockReference Tblockref = signal->theData[0];
4483 UintR TfailureNr = signal->theData[1];
4484 if (Tblockref != cpdistref) {
4497 unblock->failNo = TfailureNr;
4499 UnblockCommitOrd::SignalLength);
4501 if ((ccommitFailureNr != TfailureNr) &&
4502 (cnoCommitFailedNodes > 0)) {
4508 ccommitFailureNr = TfailureNr;
4511 nodeFail->failNo = ccommitFailureNr;
4512 nodeFail->noOfNodes = cnoCommitFailedNodes;
4514 NdbNodeBitmask::clear(nodeFail->theNodes);
4515 for(
unsigned i = 0; i < cnoCommitFailedNodes; i++) {
4517 NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
4520 if (ERROR_INSERTED(936))
4522 sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4523 200, NodeFailRep::SignalLength);
4527 sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4528 NodeFailRep::SignalLength, JBB);
4531 guard0 = cnoCommitFailedNodes - 1;
4532 arrGuard(guard0, MAX_NDB_NODES);
4537 for (Tj = 0; Tj <= guard0; Tj++) {
4539 nodePtr.i = ccommitFailedNodes[Tj];
4540 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4541 nodePtr.p->phase = ZFAIL_CLOSING;
4542 nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
4545 c_clusterNodes.
clear(nodePtr.i);
4551 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
4553 ptrAss(nodePtr, nodeRec);
4554 if (nodePtr.p->phase == ZAPI_ACTIVE) {
4559 nodeFail->failNo = ccommitFailureNr;
4560 nodeFail->noOfNodes = cnoCommitFailedNodes;
4561 NdbNodeBitmask::clear(nodeFail->theNodes);
4562 for(
unsigned i = 0; i < cnoCommitFailedNodes; i++) {
4564 NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
4566 sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
4567 NodeFailRep::SignalLength, JBB);
4574 cnoFailedNodes = clear_nodes(cnoFailedNodes,
4576 cnoCommitFailedNodes,
4577 ccommitFailedNodes);
4578 cnoPrepFailedNodes = clear_nodes(cnoPrepFailedNodes,
4580 cnoCommitFailedNodes,
4581 ccommitFailedNodes);
4582 cnoCommitFailedNodes = 0;
4588 signal->theData[0] = getOwnNodeId();
4589 sendSignal(Tblockref, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4599 void Qmgr::execCOMMIT_FAILCONF(
Signal* signal)
4602 NodeRecPtr replyNodePtr;
4604 replyNodePtr.i = signal->theData[0];
4606 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4607 replyNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4608 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4610 ptrAss(nodePtr, nodeRec);
4611 if (nodePtr.p->phase == ZRUNNING) {
4612 if (nodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4621 ctoStatus = Q_NOT_ACTIVE;
4622 if (cnoFailedNodes != 0) {
4628 cfailureNr = cfailureNr + 1;
4629 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4631 ptrAss(nodePtr, nodeRec);
4632 if (nodePtr.p->phase == ZRUNNING) {
4634 sendPrepFailReq(signal, nodePtr.i);
4648 void Qmgr::execPRES_TOCONF(
Signal* signal)
4651 NodeRecPtr replyNodePtr;
4653 replyNodePtr.i = signal->theData[0];
4654 UintR TfailureNr = signal->theData[1];
4655 if (ctoFailureNr < TfailureNr) {
4657 ctoFailureNr = TfailureNr;
4659 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4660 replyNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4661 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4663 ptrAss(nodePtr, nodeRec);
4664 if (nodePtr.p->sendPresToStatus == Q_ACTIVE) {
4672 if (ctoFailureNr > ccommitFailureNr) {
4674 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4676 ptrAss(nodePtr, nodeRec);
4677 if (nodePtr.p->phase == ZRUNNING) {
4679 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4680 signal->theData[0] = cpdistref;
4681 signal->theData[1] = ctoFailureNr;
4682 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4690 ctoStatus = Q_NOT_ACTIVE;
4691 cfailureNr = cfailureNr + 1;
4692 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4694 ptrAss(nodePtr, nodeRec);
4695 if (nodePtr.p->phase == ZRUNNING) {
4697 sendPrepFailReq(signal, nodePtr.i);
4706 void Qmgr::execREAD_NODESREQ(
Signal* signal)
4710 BlockReference TBref = signal->theData[0];
4715 nodePtr.i = getOwnNodeId();
4716 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4721 readNodes->noOfNodes = c_definedNodes.
count();
4722 readNodes->masterNodeId = cpresident;
4723 readNodes->ndynamicId = nodePtr.p->ndynamicId;
4724 c_definedNodes.
copyto(NdbNodeBitmask::Size, readNodes->definedNodes);
4725 c_clusterNodes.
copyto(NdbNodeBitmask::Size, readNodes->clusterNodes);
4726 tmp.
copyto(NdbNodeBitmask::Size, readNodes->inactiveNodes);
4727 NdbNodeBitmask::clear(readNodes->startingNodes);
4728 NdbNodeBitmask::clear(readNodes->startedNodes);
4730 sendSignal(TBref, GSN_READ_NODESCONF, signal,
4731 ReadNodesConf::SignalLength, JBB);
4734 void Qmgr::systemErrorBecauseOtherNodeFailed(
Signal* signal, Uint32 line,
4735 NodeId failedNodeId) {
4739 failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4743 "Node was shutdown during startup because node %d failed",
4746 progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4750 void Qmgr::systemErrorLab(
Signal* signal, Uint32 line,
const char *
message)
4754 failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4758 progError(line, NDBD_EXIT_NDBREQUIRE, message);
4768 void Qmgr::failReport(
Signal* signal,
4771 FailRep::FailCause aFailCause,
4774 UintR tfrMinDynamicId;
4775 NodeRecPtr failedNodePtr;
4777 NodeRecPtr presidentNodePtr;
4780 ndbassert((! aSendFailRep) || (sourceNode != 0));
4782 failedNodePtr.i = aFailedNode;
4783 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4784 if (failedNodePtr.p->phase == ZRUNNING) {
4788 if (ERROR_INSERTED(938))
4791 ndbout_c(
"QMGR : execFAIL_REP : %u nodes have failed", nodeFailCount);
4793 Uint32 nodeCount = 0;
4794 for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
4801 if (nodeFailCount > (nodeCount / 4))
4803 ndbout_c(
"QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
4805 signal->theData[0] = 9991;
4806 sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
4809 SET_ERROR_INSERT_VALUE(932);
4815 if (cpresident == getOwnNodeId()) {
4817 if (failedNodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4819 signal->theData[0] = failedNodePtr.i;
4820 sendSignal(QMGR_REF, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4822 if (failedNodePtr.p->sendPresToStatus == Q_ACTIVE) {
4824 signal->theData[0] = failedNodePtr.i;
4825 signal->theData[1] = ccommitFailureNr;
4826 sendSignal(QMGR_REF, GSN_PRES_TOCONF, signal, 2, JBA);
4829 failedNodePtr.p->phase = ZPREPARE_FAIL;
4830 failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4831 failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4832 failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4834 if (aSendFailRep == ZTRUE) {
4836 if (failedNodePtr.i != getOwnNodeId()) {
4839 failRep->failNodeId = failedNodePtr.i;
4840 failRep->failCause = aFailCause;
4841 failRep->failSourceNodeId = sourceNode;
4842 sendSignal(failedNodePtr.p->blockRef, GSN_FAIL_REP, signal,
4843 FailRep::SignalLength, JBA);
4845 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4847 ptrAss(nodePtr, nodeRec);
4848 if (nodePtr.p->phase == ZRUNNING) {
4851 failRep->failNodeId = failedNodePtr.i;
4852 failRep->failCause = aFailCause;
4853 failRep->failSourceNodeId = sourceNode;
4854 sendSignal(nodePtr.p->blockRef, GSN_FAIL_REP, signal,
4855 FailRep::SignalLength, JBA);
4859 if (failedNodePtr.i == getOwnNodeId()) {
4864 if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
4867 connectivityCheckCompleted(signal);
4870 failedNodePtr.p->ndynamicId = 0;
4871 findNeighbours(signal, __LINE__);
4872 if (failedNodePtr.i == cpresident) {
4878 tfrMinDynamicId = (UintR)-1;
4879 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4881 ptrAss(nodePtr, nodeRec);
4882 if (nodePtr.p->phase == ZRUNNING) {
4883 if ((nodePtr.p->ndynamicId & 0xFFFF) < tfrMinDynamicId) {
4885 tfrMinDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
4886 cpresident = nodePtr.i;
4890 presidentNodePtr.i = cpresident;
4891 ptrCheckGuard(presidentNodePtr, MAX_NDB_NODES, nodeRec);
4892 cpdistref = presidentNodePtr.p->blockRef;
4893 if (cpresident == getOwnNodeId()) {
4894 CRASH_INSERTION(920);
4895 cfailureNr = cprepareFailureNr;
4897 ctoStatus = Q_ACTIVE;
4899 if (cnoCommitFailedNodes > 0) {
4906 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
4909 ptrAss(nodePtr, nodeRec);
4910 if (nodePtr.p->phase == ZRUNNING) {
4912 nodePtr.p->sendPresToStatus = Q_ACTIVE;
4913 signal->theData[0] = cpdistref;
4914 signal->theData[1] = cprepareFailureNr;
4915 sendSignal(nodePtr.p->blockRef, GSN_PRES_TOREQ,
4925 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
4928 ptrAss(nodePtr, nodeRec);
4929 if (nodePtr.p->phase == ZRUNNING) {
4931 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4932 signal->theData[0] = cpdistref;
4933 signal->theData[1] = ccommitFailureNr;
4934 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal,
4941 arrGuard(cnoFailedNodes, MAX_NDB_NODES);
4942 cfailedNodes[cnoFailedNodes] = failedNodePtr.i;
4943 cnoFailedNodes = cnoFailedNodes + 1;
4951 Uint16 Qmgr::translateDynamicIdToNodeId(
Signal* signal, UintR TdynamicId)
4953 NodeRecPtr tdiNodePtr;
4954 Uint16 TtdiNodeId = ZNIL;
4956 for (tdiNodePtr.i = 1; tdiNodePtr.i < MAX_NDB_NODES; tdiNodePtr.i++) {
4958 ptrAss(tdiNodePtr, nodeRec);
4959 if (tdiNodePtr.p->ndynamicId == TdynamicId) {
4961 TtdiNodeId = tdiNodePtr.i;
4965 if (TtdiNodeId == ZNIL) {
4967 systemErrorLab(signal, __LINE__);
4976 void Qmgr::sendCloseComReq(
Signal* signal, BlockReference TBRef, Uint16 aFailNo)
4980 closeCom->xxxBlockRef = TBRef;
4981 closeCom->requestType = CloseComReqConf::RT_NODE_FAILURE;
4982 closeCom->failNo = aFailNo;
4983 closeCom->noOfNodes = cnoPrepFailedNodes;
4985 NodeBitmask::clear(closeCom->theNodes);
4987 for(
int i = 0; i < cnoPrepFailedNodes; i++) {
4988 const NodeId nodeId = cprepFailedNodes[
i];
4990 NodeBitmask::set(closeCom->theNodes, nodeId);
4993 sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
4994 CloseComReqConf::SignalLength, JBA);
4999 Qmgr::sendPrepFailReqRef(
Signal* signal,
5001 GlobalSignalNumber gsn,
5005 const NodeId theNodes[]){
5008 prepFail->xxxBlockRef = blockRef;
5009 prepFail->failNo = failNo;
5010 prepFail->noOfNodes = noOfNodes;
5012 NdbNodeBitmask::clear(prepFail->theNodes);
5014 for(Uint32 i = 0; i<noOfNodes; i++){
5015 const NodeId nodeId = theNodes[
i];
5016 NdbNodeBitmask::set(prepFail->theNodes, nodeId);
5019 sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength, JBA);
5026 void Qmgr::sendPrepFailReq(
Signal* signal, Uint16 aNode)
5028 NodeRecPtr sendNodePtr;
5029 sendNodePtr.i = aNode;
5030 ptrCheckGuard(sendNodePtr, MAX_NDB_NODES, nodeRec);
5031 sendNodePtr.p->sendPrepFailReqStatus = Q_ACTIVE;
5033 sendPrepFailReqRef(signal,
5034 sendNodePtr.p->blockRef,
5051 static const bool g_ndb_arbit_one_half_rule =
false;
5057 Qmgr::execARBIT_CFG(
Signal* signal)
5061 unsigned rank = sd->code;
5062 ndbrequire(1 <= rank && rank <= 2);
5063 arbitRec.apiMask[0].
bitOR(sd->mask);
5064 arbitRec.apiMask[rank].
assign(sd->mask);
5070 Uint32 Qmgr::getArbitDelay()
5072 switch (arbitRec.state) {
5105 Uint32 Qmgr::getArbitTimeout()
5107 switch (arbitRec.state) {
5120 return 1000 + cnoOfNodes * Uint32(hb_send_timer.getDelay());
5123 return 1000 + arbitRec.timeout;
5129 return arbitRec.timeout;
5146 Qmgr::handleArbitStart(
Signal* signal)
5149 ndbrequire(cpresident == getOwnNodeId());
5150 ndbrequire(arbitRec.state == ARBIT_NULL);
5151 arbitRec.state = ARBIT_INIT;
5152 arbitRec.newstate =
true;
5153 startArbitThread(signal);
5162 Qmgr::handleArbitApiFail(
Signal* signal, Uint16 nodeId)
5164 if (arbitRec.node != nodeId) {
5170 switch (arbitRec.state) {
5185 if (cpresident == getOwnNodeId()) {
5187 arbitRec.state = ARBIT_INIT;
5188 arbitRec.newstate =
true;
5189 startArbitThread(signal);
5192 arbitRec.state = ARBIT_NULL;
5213 Qmgr::handleArbitNdbAdd(
Signal* signal, Uint16 nodeId)
5216 ndbrequire(cpresident == getOwnNodeId());
5217 switch (arbitRec.state) {
5229 arbitRec.state = ARBIT_INIT;
5230 arbitRec.newstate =
true;
5231 startArbitThread(signal);
5237 arbitRec.newMask.
set(nodeId);
5258 Qmgr::handleArbitCheck(
Signal* signal)
5261 ndbrequire(cpresident == getOwnNodeId());
5263 computeArbitNdbMask(ndbMask);
5264 if (g_ndb_arbit_one_half_rule &&
5265 2 * ndbMask.
count() < cnoOfNodes) {
5267 arbitRec.code = ArbitCode::LoseNodes;
5271 sd->blockRef = reference();
5272 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
5275 CheckNodeGroups::SignalLength);
5277 switch (sd->output) {
5278 case CheckNodeGroups::Win:
5280 arbitRec.code = ArbitCode::WinGroups;
5282 case CheckNodeGroups::Lose:
5284 arbitRec.code = ArbitCode::LoseGroups;
5286 case CheckNodeGroups::Partitioning:
5288 arbitRec.code = ArbitCode::Partitioning;
5289 if (g_ndb_arbit_one_half_rule &&
5290 2 * ndbMask.
count() > cnoOfNodes) {
5292 arbitRec.code = ArbitCode::WinNodes;
5300 switch (arbitRec.code) {
5301 case ArbitCode::LoseNodes:
5303 case ArbitCode::LoseGroups:
5306 case ArbitCode::WinNodes:
5308 case ArbitCode::WinGroups:
5310 if (arbitRec.state == ARBIT_RUN) {
5314 arbitRec.state = ARBIT_INIT;
5315 arbitRec.newstate =
true;
5317 case ArbitCode::Partitioning:
5318 if (arbitRec.state == ARBIT_RUN) {
5320 arbitRec.state = ARBIT_CHOOSE;
5321 arbitRec.newstate =
true;
5324 if (arbitRec.apiMask[0].
count() != 0) {
5326 arbitRec.code = ArbitCode::LoseNorun;
5329 arbitRec.code = ArbitCode::LoseNocfg;
5335 arbitRec.state = ARBIT_CRASH;
5336 arbitRec.newstate =
true;
5340 switch (arbitRec.state) {
5343 arbitRec.newMask.
bitAND(ndbMask);
5344 arbitRec.recvMask.
bitAND(ndbMask);
5345 sendCommitFailReq(signal);
5353 startArbitThread(signal);
5361 Qmgr::startArbitThread(
Signal* signal)
5364 ndbrequire(cpresident == getOwnNodeId());
5365 arbitRec.code = ArbitCode::ThreadStart;
5367 signal->theData[1] = ++arbitRec.thread;
5368 runArbitThread(signal);
5376 Qmgr::runArbitThread(
Signal* signal)
5381 computeArbitNdbMask(ndbMask);
5382 ndbout <<
"arbit thread:";
5383 ndbout <<
" state=" << arbitRec.state;
5384 ndbout <<
" newstate=" << arbitRec.newstate;
5385 ndbout <<
" thread=" << arbitRec.thread;
5386 ndbout <<
" node=" << arbitRec.node;
5387 arbitRec.ticket.getText(buf,
sizeof(buf));
5388 ndbout <<
" ticket=" <<
buf;
5390 ndbout <<
" ndbmask=" <<
buf;
5391 ndbout <<
" sendcount=" << arbitRec.sendCount;
5392 ndbout <<
" recvcount=" << arbitRec.recvCount;
5393 arbitRec.recvMask.
getText(buf);
5394 ndbout <<
" recvmask=" <<
buf;
5395 ndbout <<
" code=" << arbitRec.code;
5398 if (signal->theData[1] != arbitRec.thread) {
5402 switch (arbitRec.state) {
5405 stateArbitInit(signal);
5409 stateArbitFind(signal);
5415 stateArbitPrep(signal);
5419 stateArbitStart(signal);
5423 stateArbitRun(signal);
5427 stateArbitChoose(signal);
5431 stateArbitCrash(signal);
5437 signal->theData[0] = ZARBIT_HANDLING;
5438 signal->theData[1] = arbitRec.thread;
5439 signal->theData[2] = arbitRec.state;
5440 Uint32 delay = getArbitDelay();
5443 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBA);
5444 }
else if (delay == 1) {
5446 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
5449 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, delay, 3);
5458 Qmgr::stateArbitInit(
Signal* signal)
5460 if (arbitRec.newstate) {
5462 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5465 arbitRec.ticket.update();
5466 arbitRec.newMask.
clear();
5468 arbitRec.newstate =
false;
5470 arbitRec.state = ARBIT_FIND;
5471 arbitRec.newstate =
true;
5472 stateArbitFind(signal);
5485 Qmgr::stateArbitFind(
Signal* signal)
5487 if (arbitRec.newstate) {
5489 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5492 arbitRec.newstate =
false;
5495 switch (arbitRec.method){
5496 case ArbitRec::METHOD_EXTERNAL:
5500 arbitRec.state = ARBIT_PREP1;
5501 arbitRec.newstate =
true;
5502 stateArbitPrep(signal);
5507 case ArbitRec::METHOD_DEFAULT:
5511 for (
unsigned rank = 1; rank <= 2; rank++) {
5514 const unsigned stop = NodeBitmask::NotFound;
5515 while ((aPtr.i = arbitRec.apiMask[rank].
find(aPtr.i + 1)) != stop) {
5517 ptrAss(aPtr, nodeRec);
5518 if (aPtr.p->phase != ZAPI_ACTIVE)
5520 arbitRec.node = aPtr.i;
5521 arbitRec.state = ARBIT_PREP1;
5522 arbitRec.newstate =
true;
5523 stateArbitPrep(signal);
5542 Qmgr::stateArbitPrep(
Signal* signal)
5544 if (arbitRec.newstate) {
5546 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5548 arbitRec.sendCount = 0;
5549 computeArbitNdbMask(arbitRec.recvMask);
5550 arbitRec.recvMask.
clear(getOwnNodeId());
5552 arbitRec.newstate =
false;
5554 if (! arbitRec.sendCount) {
5558 const unsigned stop = NodeBitmask::NotFound;
5559 while ((aPtr.i = arbitRec.recvMask.
find(aPtr.i + 1)) != stop) {
5561 ptrAss(aPtr, nodeRec);
5563 sd->sender = getOwnNodeId();
5564 if (arbitRec.state == ARBIT_PREP1) {
5566 sd->code = ArbitCode::PrepPart1;
5569 sd->code = ArbitCode::PrepPart2;
5571 sd->node = arbitRec.node;
5572 sd->ticket = arbitRec.ticket;
5574 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5575 ArbitSignalData::SignalLength, JBB);
5577 arbitRec.setTimestamp();
5578 arbitRec.sendCount = 1;
5581 if (arbitRec.code != 0) {
5583 arbitRec.state = ARBIT_INIT;
5584 arbitRec.newstate =
true;
5587 if (arbitRec.recvMask.
count() == 0) {
5588 if (arbitRec.state == ARBIT_PREP1) {
5590 arbitRec.state = ARBIT_PREP2;
5591 arbitRec.newstate =
true;
5594 arbitRec.state = ARBIT_START;
5595 arbitRec.newstate =
true;
5596 stateArbitStart(signal);
5600 if (arbitRec.getTimediff() > getArbitTimeout()) {
5602 arbitRec.state = ARBIT_INIT;
5603 arbitRec.newstate =
true;
5609 Qmgr::execARBIT_PREPREQ(
Signal* signal)
5613 if (getOwnNodeId() == cpresident) {
5617 if (sd->sender != cpresident) {
5622 aPtr.i = sd->sender;
5623 ptrAss(aPtr, nodeRec);
5625 case ArbitCode::PrepPart1:
5628 arbitRec.ticket.clear();
5630 case ArbitCode::PrepPart2:
5632 case ArbitCode::PrepAtrun:
5634 arbitRec.node = sd->node;
5635 arbitRec.ticket = sd->ticket;
5636 arbitRec.code = sd->code;
5638 arbitRec.state = ARBIT_RUN;
5639 arbitRec.newstate =
true;
5640 if (sd->code == ArbitCode::PrepAtrun) {
5649 sd->sender = getOwnNodeId();
5651 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPCONF, signal,
5652 ArbitSignalData::SignalLength, JBB);
5656 Qmgr::execARBIT_PREPCONF(
Signal* signal)
5660 if (! arbitRec.match(sd)) {
5664 if (arbitRec.state != ARBIT_PREP1 && arbitRec.state != ARBIT_PREP2) {
5668 if (! arbitRec.recvMask.
get(sd->sender)) {
5672 arbitRec.recvMask.
clear(sd->sender);
5673 if (arbitRec.code == 0 && sd->code != 0) {
5675 arbitRec.code = sd->code;
5680 Qmgr::execARBIT_PREPREF(
Signal* signal)
5684 if (sd->code == 0) {
5686 sd->code = ArbitCode::ErrUnknown;
5688 execARBIT_PREPCONF(signal);
5696 Qmgr::stateArbitStart(
Signal* signal)
5698 if (arbitRec.newstate) {
5700 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5702 arbitRec.sendCount = 0;
5703 arbitRec.recvCount = 0;
5705 arbitRec.newstate =
false;
5708 switch (arbitRec.method){
5709 case ArbitRec::METHOD_EXTERNAL:
5711 ndbrequire(arbitRec.node == 0);
5714 arbitRec.state = ARBIT_RUN;
5715 arbitRec.newstate =
true;
5719 case ArbitRec::METHOD_DEFAULT:
5720 if (! arbitRec.sendCount) {
5722 BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5724 sd->sender = getOwnNodeId();
5726 sd->node = arbitRec.node;
5727 sd->ticket = arbitRec.ticket;
5729 sendSignal(blockRef, GSN_ARBIT_STARTREQ, signal,
5730 ArbitSignalData::SignalLength, JBB);
5731 arbitRec.sendCount = 1;
5732 arbitRec.setTimestamp();
5735 if (arbitRec.recvCount) {
5738 if (arbitRec.code == ArbitCode::ApiStart) {
5740 arbitRec.state = ARBIT_RUN;
5741 arbitRec.newstate =
true;
5744 arbitRec.state = ARBIT_INIT;
5745 arbitRec.newstate =
true;
5748 if (arbitRec.getTimediff() > getArbitTimeout()) {
5750 arbitRec.code = ArbitCode::ErrTimeout;
5752 arbitRec.state = ARBIT_INIT;
5753 arbitRec.newstate =
true;
5765 Qmgr::execARBIT_STARTCONF(
Signal* signal)
5769 if (! arbitRec.match(sd)) {
5773 if (arbitRec.state != ARBIT_START) {
5777 if (arbitRec.recvCount) {
5781 arbitRec.code = sd->code;
5782 arbitRec.recvCount = 1;
5786 Qmgr::execARBIT_STARTREF(
Signal* signal)
5790 if (sd->code == 0) {
5792 sd->code = ArbitCode::ErrUnknown;
5794 execARBIT_STARTCONF(signal);
5802 Qmgr::stateArbitRun(
Signal* signal)
5804 if (arbitRec.newstate) {
5806 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5809 arbitRec.newstate =
false;
5813 const unsigned stop = NodeBitmask::NotFound;
5814 while ((aPtr.i = arbitRec.newMask.
find(aPtr.i + 1)) != stop) {
5816 arbitRec.newMask.
clear(aPtr.i);
5817 ptrAss(aPtr, nodeRec);
5819 sd->sender = getOwnNodeId();
5820 sd->code = ArbitCode::PrepAtrun;
5821 sd->node = arbitRec.node;
5822 sd->ticket = arbitRec.ticket;
5824 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5825 ArbitSignalData::SignalLength, JBB);
5836 Qmgr::stateArbitChoose(
Signal* signal)
5838 if (arbitRec.newstate) {
5840 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5842 arbitRec.sendCount = 0;
5843 arbitRec.recvCount = 0;
5845 arbitRec.newstate =
false;
5848 switch(arbitRec.method){
5849 case ArbitRec::METHOD_EXTERNAL:
5851 if (! arbitRec.sendCount) {
5853 ndbrequire(arbitRec.node == 0);
5855 arbitRec.sendCount = 1;
5856 arbitRec.setTimestamp();
5860 if (arbitRec.getTimediff() > getArbitTimeout()) {
5863 ndbrequire(arbitRec.node == 0);
5866 computeArbitNdbMask(nodes);
5867 arbitRec.code = ArbitCode::WinWaitExternal;
5870 sendCommitFailReq(signal);
5871 arbitRec.state = ARBIT_INIT;
5872 arbitRec.newstate =
true;
5878 case ArbitRec::METHOD_DEFAULT:
5880 if (! arbitRec.sendCount) {
5882 const BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5884 sd->sender = getOwnNodeId();
5886 sd->node = arbitRec.node;
5887 sd->ticket = arbitRec.ticket;
5888 computeArbitNdbMask(sd->mask);
5889 sendSignal(blockRef, GSN_ARBIT_CHOOSEREQ, signal,
5890 ArbitSignalData::SignalLength, JBA);
5891 arbitRec.sendCount = 1;
5892 arbitRec.setTimestamp();
5896 if (arbitRec.recvCount) {
5899 if (arbitRec.code == ArbitCode::WinChoose) {
5901 sendCommitFailReq(signal);
5902 arbitRec.state = ARBIT_INIT;
5903 arbitRec.newstate =
true;
5906 arbitRec.state = ARBIT_CRASH;
5907 arbitRec.newstate =
true;
5908 stateArbitCrash(signal);
5912 if (arbitRec.getTimediff() > getArbitTimeout()) {
5915 arbitRec.code = ArbitCode::ErrTimeout;
5917 arbitRec.state = ARBIT_CRASH;
5918 arbitRec.newstate =
true;
5919 stateArbitCrash(signal);
5932 Qmgr::execARBIT_CHOOSECONF(
Signal* signal)
5936 if (!arbitRec.match(sd)) {
5940 if (arbitRec.state != ARBIT_CHOOSE) {
5944 if (arbitRec.recvCount) {
5948 arbitRec.recvCount = 1;
5949 arbitRec.code = sd->code;
5953 Qmgr::execARBIT_CHOOSEREF(
Signal* signal)
5957 if (sd->code == 0) {
5959 sd->code = ArbitCode::ErrUnknown;
5961 execARBIT_CHOOSECONF(signal);
5969 Qmgr::stateArbitCrash(
Signal* signal)
5972 if (arbitRec.newstate) {
5974 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5975 arbitRec.setTimestamp();
5977 arbitRec.newstate =
false;
5979 #ifdef ndb_arbit_crash_wait_for_event_report_to_get_out
5980 if (! (arbitRec.getTimediff() > getArbitTimeout()))
5983 CRASH_INSERTION(932);
5984 CRASH_INSERTION(938);
5985 progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN,
5986 "Arbitrator decided to shutdown this node");
5995 Qmgr::execARBIT_STOPREP(
Signal* signal)
5999 if (! arbitRec.match(sd)) {
6003 arbitRec.code = ArbitCode::ApiExit;
6004 handleArbitApiFail(signal, arbitRec.node);
6012 for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6014 ptrAss(aPtr, nodeRec);
6027 for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6029 ptrAss(aPtr, nodeRec);
6047 sd->code = arbitRec.code | (arbitRec.state << 16);
6048 sd->node = arbitRec.node;
6049 sd->ticket = arbitRec.ticket;
6051 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
6052 ArbitSignalData::SignalLength, JBB);
6058 Qmgr::execDUMP_STATE_ORD(
Signal* signal)
6060 switch (signal->theData[0]) {
6062 infoEvent(
"creadyDistCom = %d, cpresident = %d\n",
6063 creadyDistCom, cpresident);
6064 infoEvent(
"cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n",
6066 c_start.m_president_candidate,
6067 c_start.m_president_candidate_gci);
6068 infoEvent(
"ctoStatus = %d\n", ctoStatus);
6069 for(Uint32 i = 1; i<MAX_NDB_NODES; i++){
6072 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
6074 switch(nodePtr.p->phase){
6076 sprintf(buf,
"Node %d: ZINIT(%d)", i, nodePtr.p->phase);
6079 sprintf(buf,
"Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
6082 sprintf(buf,
"Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
6085 sprintf(buf,
"Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
6088 sprintf(buf,
"Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
6091 sprintf(buf,
"Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
6094 sprintf(buf,
"Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
6097 sprintf(buf,
"Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
6105 if (signal->theData[0] == 935 && signal->getLength() == 2)
6107 SET_ERROR_INSERT_VALUE(935);
6108 c_error_insert_extra = signal->theData[1];
6112 if (signal->theData[0] == 900 && signal->getLength() == 2)
6114 ndbout_c(
"disconnecting %u", signal->theData[1]);
6115 api_failed(signal, signal->theData[1]);
6118 if (signal->theData[0] == 908)
6120 int tag = signal->getLength() < 2 ? -1 : signal->theData[1];
6125 sprintf(buf+strlen(buf),
"%d:", tag);
6126 sprintf(buf+strlen(buf),
" pres:%u", cpresident);
6127 sprintf(buf+strlen(buf),
" own:%u", getOwnNodeId());
6128 NodeRecPtr myNodePtr;
6129 myNodePtr.i = getOwnNodeId();
6130 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
6131 sprintf(buf+strlen(buf),
" dyn:%u-%u", myNodePtr.p->ndynamicId & 0xFFFF, myNodePtr.p->ndynamicId >> 16);
6132 sprintf(buf+strlen(buf),
" mxdyn:%u", c_maxDynamicId);
6133 sprintf(buf+strlen(buf),
" hb:%u->%u->%u", cneighbourl, getOwnNodeId(), cneighbourh);
6134 sprintf(buf+strlen(buf),
" node:dyn-hi,cfg:");
6136 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6138 ptrAss(nodePtr, nodeRec);
6142 sprintf(buf+strlen(buf),
" %u:%u-%u,%u", nodePtr.i, nodePtr.p->ndynamicId & 0xFFFF, nodePtr.p->ndynamicId >> 16, nodePtr.p->hbOrder);
6145 ndbout << buf << endl;
6149 Uint32 dumpCode = signal->theData[0];
6150 if ((dumpCode == 9992) ||
6153 if (signal->getLength() == 2)
6155 Uint32 nodeId = signal->theData[1];
6156 Uint32& newNodeId = signal->theData[1];
6158 assert(257 > MAX_NODES);
6159 if (nodeId > MAX_NODES)
6161 const char* type =
"None";
6167 newNodeId = cneighbourl;
6168 type =
"Left neighbour";
6174 newNodeId = cneighbourh;
6175 type =
"Right neighbour";
6181 newNodeId = cpresident;
6186 ndbout_c(
"QMGR : Mapping request on node id %u to node id %u (%s)",
6187 nodeId, newNodeId, type);
6188 if (newNodeId != nodeId)
6190 sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
6196 if (dumpCode == 9994)
6198 ndbout_c(
"setCCDelay(%u)", signal->theData[1]);
6199 setCCDelay(signal->theData[1]);
6200 m_connectivity_check.m_enabled =
true;
6207 Qmgr::execAPI_BROADCAST_REP(
Signal* signal)
6213 Uint32 len = signal->getLength() - ApiBroadcastRep::SignalLength;
6214 memmove(signal->theData, signal->theData+ApiBroadcastRep::SignalLength,
6219 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
6222 ptrAss(nodePtr, nodeRec);
6223 if (nodePtr.p->phase == ZAPI_ACTIVE &&
6227 mask.
set(nodePtr.i);
6234 releaseSections(handle);
6239 sendSignal(rg, api.gsn, signal, len, JBB,
6244 Qmgr::execNODE_FAILREP(
Signal * signal)
6249 c_counterMgr.execNODE_FAILREP(signal);
6253 Qmgr::execALLOC_NODEID_REQ(
Signal * signal)
6260 nodePtr.i = req.nodeId;
6261 ptrAss(nodePtr, nodeRec);
6263 if (refToBlock(req.senderRef) != QMGR)
6267 if (getOwnNodeId() != cpresident)
6270 error = AllocNodeIdRef::NotMaster;
6272 else if (!opAllocNodeIdReq.m_tracker.done())
6275 error = AllocNodeIdRef::Busy;
6277 else if (c_connectedNodes.
get(req.nodeId))
6280 error = AllocNodeIdRef::NodeConnected;
6282 else if (nodePtr.p->m_secret != 0)
6285 error = AllocNodeIdRef::NodeReserved;
6292 ref->senderRef = reference();
6293 ref->errorCode = error;
6294 ref->masterRef = numberToRef(QMGR, cpresident);
6295 ref->senderData = req.senderData;
6296 ref->nodeId = req.nodeId;
6297 sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6298 AllocNodeIdRef::SignalLength, JBB);
6302 if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
6304 CRASH_INSERTION(934);
6310 Uint64 now = NdbTick_CurrentMillisecond();
6311 Uint32 secret_hi = Uint32(now >> 24);
6312 Uint32 secret_lo = Uint32(now << 8) + getOwnNodeId();
6313 req.secret_hi = secret_hi;
6314 req.secret_lo = secret_lo;
6316 if (req.timeout > 60000)
6317 req.timeout = 60000;
6319 nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
6320 nodePtr.p->m_alloc_timeout = now + req.timeout;
6322 opAllocNodeIdReq.m_req = req;
6323 opAllocNodeIdReq.m_error = 0;
6324 opAllocNodeIdReq.m_connectCount =
6330 req2->senderRef = reference();
6333 p.init<
AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
6335 sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
6336 AllocNodeIdReq::SignalLengthQMGR, JBB);
6341 if (c_connectedNodes.
get(req.nodeId))
6344 error = AllocNodeIdRef::NodeConnected;
6349 error = AllocNodeIdRef::NodeTypeMismatch;
6351 else if (nodePtr.p->failState != NORMAL)
6354 error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
6362 else if (nodePtr.p->m_secret != 0)
6365 error = AllocNodeIdRef::NodeReserved;
6373 ref->senderRef = reference();
6374 ref->errorCode = error;
6375 ref->senderData = req.senderData;
6376 ref->nodeId = req.nodeId;
6377 ref->masterRef = numberToRef(QMGR, cpresident);
6378 sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6379 AllocNodeIdRef::SignalLength, JBB);
6384 conf->senderRef = reference();
6385 conf->secret_hi = req.secret_hi;
6386 conf->secret_lo = req.secret_lo;
6387 sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6388 AllocNodeIdConf::SignalLength, JBB);
6392 Qmgr::execALLOC_NODEID_CONF(
Signal * signal)
6398 opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
6399 refToNode(conf->senderRef));
6401 if (signal->getLength() >= AllocNodeIdConf::SignalLength)
6404 if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
6405 opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
6408 if (opAllocNodeIdReq.m_error == 0)
6411 opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
6416 completeAllocNodeIdReq(signal);
6421 Qmgr::execALLOC_NODEID_REF(
Signal * signal)
6427 if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
6430 opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
6431 refToNode(ref->senderRef));
6436 opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
6437 refToNode(ref->senderRef));
6438 if (opAllocNodeIdReq.m_error == 0)
6441 opAllocNodeIdReq.m_error = ref->errorCode;
6444 completeAllocNodeIdReq(signal);
6448 Qmgr::completeAllocNodeIdReq(
Signal *signal)
6452 if (!opAllocNodeIdReq.m_tracker.done())
6458 if (opAllocNodeIdReq.m_connectCount !=
6466 if (opAllocNodeIdReq.m_tracker.hasRef())
6475 nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
6476 ptrAss(nodePtr, nodeRec);
6477 nodePtr.p->m_secret = 0;
6481 ref->senderRef = reference();
6482 ref->senderData = opAllocNodeIdReq.m_req.senderData;
6483 ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
6484 ref->errorCode = opAllocNodeIdReq.m_error;
6485 ref->masterRef = numberToRef(QMGR, cpresident);
6486 ndbassert(AllocNodeIdRef::SignalLength == 5);
6487 sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6488 AllocNodeIdRef::SignalLength, JBB);
6495 conf->senderRef = reference();
6496 conf->senderData = opAllocNodeIdReq.m_req.senderData;
6497 conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
6498 conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
6499 conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
6500 sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6501 AllocNodeIdConf::SignalLength, JBB);
6505 Qmgr::execSTOP_REQ(
Signal* signal)
6508 c_stopReq = * (
StopReq*)signal->getDataPtr();
6510 if (c_stopReq.senderRef)
6516 conf->senderData = c_stopReq.senderData;
6517 conf->nodeState = getOwnNodeId();
6518 sendSignal(c_stopReq.senderRef,
6519 GSN_STOP_CONF, signal, StopConf::SignalLength, JBA);
6524 Qmgr::check_multi_node_shutdown(
Signal* signal)
6526 if (c_stopReq.senderRef &&
6530 if(StopReq::getPerformRestart(c_stopReq.requestInfo))
6535 sendSignal(CMVMI_REF, GSN_START_ORD, signal, 2, JBA);
6537 sendSignal(CMVMI_REF, GSN_STOP_ORD, signal, 1, JBA);
6545 Qmgr::check_hb_order_config()
6547 m_hb_order_config_used =
false;
6549 Uint32 count_zero = 0;
6551 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6553 ptrAss(nodePtr, nodeRec);
6558 if (nodePtr.p->hbOrder == 0)
6562 ndbrequire(count != 0);
6563 if (count_zero == count)
6568 if (count_zero != 0)
6573 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6575 ptrAss(nodePtr, nodeRec);
6579 NodeRecPtr nodePtr2;
6580 for (nodePtr2.i = nodePtr.i + 1; nodePtr2.i < MAX_NDB_NODES; nodePtr2.i++)
6582 ptrAss(nodePtr2, nodeRec);
6586 if (nodePtr.i != nodePtr2.i &&
6587 nodePtr.p->hbOrder == nodePtr2.p->hbOrder)
6596 m_hb_order_config_used =
true;
6600 static const Uint32 CC_SuspectTicks = 1;
6601 static const Uint32 CC_FailedTicks = 2;
6604 Qmgr::startConnectivityCheck(
Signal* signal, Uint32 reason, Uint32 causingNode)
6607 ndbrequire(m_connectivity_check.getEnabled());
6609 if (m_connectivity_check.m_active)
6619 m_connectivity_check.m_nodesPinged.
clear();
6626 Uint32 ownId = getOwnNodeId();
6628 pingReq->senderData = ++m_connectivity_check.m_currentRound;
6629 pingReq->senderRef = reference();
6631 for (Uint32 i=1; i < MAX_NDB_NODES; i++)
6635 NodeRec& node = nodeRec[
i];
6636 if (node.phase == ZRUNNING)
6642 sendSignal(node.blockRef,
6645 NodePingReq::SignalLength,
6648 m_connectivity_check.m_nodesPinged.
set(i);
6654 m_connectivity_check.m_nodesWaiting.
assign(m_connectivity_check.m_nodesPinged);
6655 m_connectivity_check.m_nodesFailedDuring.
clear();
6658 m_connectivity_check.m_nodesSuspect.
bitAND(m_connectivity_check.m_nodesPinged);
6660 const char* reasonText =
"Unknown";
6661 bool firstTime =
true;
6665 case FailRep::ZHEARTBEAT_FAILURE:
6666 reasonText =
"Heartbeat failure";
6668 case FailRep::ZCONNECT_CHECK_FAILURE:
6669 reasonText =
"Connectivity check request";
6673 ndbrequire(m_connectivity_check.m_nodesSuspect.
count() > 0);
6677 if (!m_connectivity_check.m_nodesPinged.
isclear())
6682 m_connectivity_check.m_nodesPinged.
getText(buff);
6685 g_eventLogger->
info(
"QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
6686 m_connectivity_check.m_nodesPinged.
count(),
6694 m_connectivity_check.m_nodesSuspect.
getText(buff2);
6695 g_eventLogger->
info(
"QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
6696 m_connectivity_check.m_nodesPinged.
count(),
6698 m_connectivity_check.m_nodesSuspect.
count(),
6704 Uint32 bitmaskSz = NdbNodeBitmask::Size;
6706 signal->theData[1] = m_connectivity_check.m_nodesPinged.
count();
6707 signal->theData[2] = reason;
6708 signal->theData[3] = causingNode;
6709 signal->theData[4] = bitmaskSz;
6710 Uint32* sigPtr = &signal->theData[5];
6711 m_connectivity_check.m_nodesPinged.
copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
6712 m_connectivity_check.m_nodesSuspect.
copyto(bitmaskSz, sigPtr);
6713 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5 + (2 * bitmaskSz), JBB);
6715 m_connectivity_check.m_active =
true;
6716 m_connectivity_check.m_tick = 0;
6717 NDB_TICKS now = NdbTick_CurrentMillisecond();
6718 m_connectivity_check.m_timer.
reset(now);
6722 g_eventLogger->
info(
"QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
6729 Qmgr::execNODE_PINGREQ(
Signal* signal)
6732 Uint32 ownId = getOwnNodeId();
6734 Uint32 sendersRef = signal->getSendersBlockRef();
6735 Uint32 sendersNodeId = refToNode(sendersRef);
6736 Uint32 senderData = pingReq->senderData;
6738 ndbrequire(sendersNodeId != ownId);
6746 if (likely(m_connectivity_check.getEnabled()))
6750 if (! m_connectivity_check.m_active)
6759 nodePtr.i = sendersNodeId;
6760 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
6761 if (unlikely(nodePtr.p->phase != ZRUNNING))
6765 g_eventLogger->
warning(
"QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
6766 sendersNodeId, nodePtr.p->phase);
6772 startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
6778 g_eventLogger->
warning(
"QMGR : NODE_PINGREQ received from node %u, but connectivity "
6779 "checking not configured on this node. Ensure all "
6780 "nodes have the same configuration for parameter "
6781 "ConnectCheckIntervalMillis.",
6788 pingConf->senderData = senderData;
6789 pingConf->senderRef = reference();
6791 sendSignal(sendersRef,
6794 NodePingConf::SignalLength,
6799 Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
6802 m_nodesSuspect.
clear(nodeId);
6806 Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
6808 if (unlikely(m_active))
6810 m_nodesFailedDuring.set(nodeId);
6812 if (m_nodesWaiting.get(nodeId))
6817 m_nodesWaiting.clear(nodeId);
6819 return m_nodesWaiting.isclear();
6826 Qmgr::execNODE_PINGCONF(
Signal* signal)
6830 ndbrequire(m_connectivity_check.getEnabled());
6833 Uint32 sendersBlockRef = signal->getSendersBlockRef();
6834 Uint32 sendersNodeId = refToNode(sendersBlockRef);
6835 Uint32 roundNumber = pingConf->senderData;
6837 ndbrequire(sendersNodeId != getOwnNodeId());
6838 ndbrequire((m_connectivity_check.m_active) ||
6839 (m_connectivity_check.m_nodesWaiting.
get(sendersNodeId) ||
6840 m_connectivity_check.m_nodesFailedDuring.
get(sendersNodeId)));
6842 if (unlikely((! m_connectivity_check.m_active) ||
6843 (roundNumber != m_connectivity_check.m_currentRound)))
6845 g_eventLogger->
warning(
"QMGR : Received NODEPING_CONF from node %u for round %u, "
6846 "but we are %sactive on round %u. Discarding.",
6849 ((m_connectivity_check.m_active)?
"":
"in"),
6850 m_connectivity_check.m_currentRound);
6857 ndbrequire(m_connectivity_check.m_nodesPinged.
get(sendersNodeId));
6858 ndbrequire(m_connectivity_check.m_nodesWaiting.
get(sendersNodeId) ||
6859 m_connectivity_check.m_nodesFailedDuring.
get(sendersNodeId));
6861 m_connectivity_check.m_nodesWaiting.
clear(sendersNodeId);
6863 if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
6867 m_connectivity_check.m_nodesSuspect.
clear(sendersNodeId);
6870 if (m_connectivity_check.m_nodesWaiting.
isclear())
6874 connectivityCheckCompleted(signal);
6879 Qmgr::connectivityCheckCompleted(
Signal* signal)
6883 m_connectivity_check.m_active =
false;
6900 NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
6901 survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
6904 survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
6906 m_connectivity_check.m_nodesPinged.
getText(pinged);
6907 survivingSuspects.getText(late);
6908 m_connectivity_check.m_nodesWaiting.
getText(silent);
6909 m_connectivity_check.m_nodesFailedDuring.
getText(failed);
6911 g_eventLogger->
info(
"QMGR : Connectivity check completed, "
6912 "%u other nodes checked (%s), "
6913 "%u responded on time, "
6914 "%u responded late (%s), "
6915 "%u no response will be failed (%s), "
6916 "%u failed during check (%s)\n",
6917 m_connectivity_check.m_nodesPinged.
count(),
6919 m_connectivity_check.m_nodesPinged.
count() -
6920 m_connectivity_check.m_nodesSuspect.
count(),
6921 survivingSuspects.count(),
6923 m_connectivity_check.m_nodesWaiting.
count(),
6925 m_connectivity_check.m_nodesFailedDuring.
count(),
6930 signal->theData[1] = m_connectivity_check.m_nodesPinged.
count();
6931 signal->theData[2] = survivingSuspects.count();
6932 signal->theData[3] = m_connectivity_check.m_nodesWaiting.
count() +
6933 m_connectivity_check.m_nodesFailedDuring.
count();
6935 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
6937 if (survivingSuspects.count() > 0)
6941 g_eventLogger->
info(
"QMGR : Starting new connectivity check due to suspect nodes.");
6943 startConnectivityCheck(signal, 0, 0);
6950 g_eventLogger->
info(
"QMGR : All other nodes (%u) connectivity ok.",
6951 m_connectivity_check.m_nodesPinged.
count() -
6952 (m_connectivity_check.m_nodesWaiting.
count() +
6953 m_connectivity_check.m_nodesFailedDuring.
count()));
6958 sendHeartbeat(signal);
6959 hb_send_timer.
reset(NdbTick_CurrentMillisecond());
6964 Qmgr::checkConnectivityTimeSignal(
Signal* signal)
6979 ndbrequire(m_connectivity_check.getEnabled());
6980 ndbrequire(m_connectivity_check.m_active);
6981 ndbrequire(!m_connectivity_check.m_nodesWaiting.
isclear());
6983 m_connectivity_check.m_tick++;
6985 switch (m_connectivity_check.m_tick)
6987 case CC_SuspectTicks:
6993 m_connectivity_check.m_nodesSuspect.
bitOR(m_connectivity_check.m_nodesWaiting);
6996 case CC_FailedTicks:
7002 m_connectivity_check.m_active =
false;
7005 while ((nodeId = m_connectivity_check.m_nodesWaiting.
find(nodeId))
7006 != BitmaskImpl::NotFound)
7012 signal->theData[1] = nodeId;
7014 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
7018 failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
7023 connectivityCheckCompleted(signal);
7029 Qmgr::isNodeConnectivitySuspect(Uint32 nodeId)
const
7031 return m_connectivity_check.m_nodesSuspect.
get(nodeId);
7035 Qmgr::handleFailFromSuspect(
Signal* signal,
7042 const char* reasonText =
"Unknown";
7053 case FailRep::ZCONNECT_CHECK_FAILURE:
7059 reasonText =
"ZCONNECT_CHECK_FAILURE";
7061 case FailRep::ZLINK_FAILURE:
7067 reasonText =
"ZLINK_FAILURE";
7073 g_eventLogger->
warning(
"QMGR : Received Connectivity failure notification about "
7074 "%u from suspect node %u with reason %s. "
7075 "Mapping to failure of %u sourced by me.",
7076 aFailedNode, sourceNode, reasonText, sourceNode);
7079 signal->theData[1] = reason;
7080 signal->theData[2] = aFailedNode;
7081 signal->theData[3] = sourceNode;
7083 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7085 failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());