18 #include <NdbRestarter.hpp>
22 #include <mgmapi_debug.h>
23 #include <NDBT_Output.hpp>
25 #include <kernel/ndb_limits.h>
26 #include <ndb_version.h>
27 #include <NodeBitmask.hpp>
30 ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
31 << ", line="<<ndb_mgm_get_latest_error_line(h) \
32 << ", mesg="<<ndb_mgm_get_latest_error_msg(h) \
33 << ", desc="<<ndb_mgm_get_latest_error_desc(h) \
37 NdbRestarter::NdbRestarter(
const char* _addr):
50 NdbRestarter::~NdbRestarter(){
55 int NdbRestarter::getDbNodeId(
int _i){
62 for(
size_t i = 0;
i < ndbNodes.size();
i++){
63 if (
i == (
unsigned)_i){
64 return ndbNodes[
i].node_id;
72 NdbRestarter::restartOneDbNode(
int _nodeId,
79 (inital ? NRRF_INITIAL : 0) |
80 (nostart ? NRRF_NOSTART : 0) |
81 (abort ? NRRF_ABORT : 0) |
82 (force ? NRRF_FORCE : 0));
95 (flags & NRRF_INITIAL),
96 (flags & NRRF_NOSTART),
108 if (getStatus() != 0)
111 g_info <<
"ndb_mgm_restart4 returned with error, checking node state"
114 for (
int j = 0; j<cnt; j++)
116 int _nodeId = nodes[j];
117 for(
size_t i = 0;
i < ndbNodes.size();
i++)
119 if(ndbNodes[
i].node_id == _nodeId)
121 g_info <<_nodeId<<
": status="<<ndbNodes[
i].node_status<<endl;
123 switch(ndbNodes[
i].node_status){
129 g_err <<
"Could not stop node with id = "<< _nodeId << endl;
141 NdbRestarter::getMasterNodeId(){
145 if (getStatus() != 0)
150 for(
size_t i = 0;
i < ndbNodes.size();
i++){
151 if(min == 0 || ndbNodes[
i].dynamic_id < min){
152 min = ndbNodes[
i].dynamic_id;
153 node = ndbNodes[
i].node_id;
161 NdbRestarter::getNodeGroup(
int nodeId){
165 if (getStatus() != 0)
168 for(
size_t i = 0;
i < ndbNodes.size();
i++)
170 if(ndbNodes[
i].node_id == nodeId)
172 return ndbNodes[
i].node_group;
180 NdbRestarter::getNextMasterNodeId(
int nodeId){
184 if (getStatus() != 0)
188 for(i = 0; i < ndbNodes.size(); i++)
190 if(ndbNodes[i].node_id == nodeId)
195 assert(i < ndbNodes.size());
196 if (i == ndbNodes.size())
199 int dynid = ndbNodes[
i].dynamic_id;
201 for (i = 0; i<ndbNodes.size(); i++)
202 if (ndbNodes[i].dynamic_id > minid)
203 minid = ndbNodes[
i].dynamic_id;
205 for (i = 0; i<ndbNodes.size(); i++)
206 if (ndbNodes[i].dynamic_id > dynid &&
207 ndbNodes[i].dynamic_id < minid)
209 minid = ndbNodes[
i].dynamic_id;
214 for (i = 0; i<ndbNodes.size(); i++)
215 if (ndbNodes[i].dynamic_id == minid)
216 return ndbNodes[
i].node_id;
219 return getMasterNodeId();
223 NdbRestarter::getRandomNotMasterNodeId(
int rand){
224 int master = getMasterNodeId();
229 rand = rand % ndbNodes.size();
230 while(counter++ < ndbNodes.size() && ndbNodes[rand].node_id == master)
231 rand = (rand + 1) % ndbNodes.size();
233 if(ndbNodes[rand].node_id != master)
234 return ndbNodes[rand].node_id;
239 NdbRestarter::getRandomNodeOtherNodeGroup(
int nodeId,
int rand){
243 if (getStatus() != 0)
247 for(
size_t i = 0; i < ndbNodes.size(); i++){
248 if(ndbNodes[i].node_id == nodeId){
249 node_group = ndbNodes[
i].node_group;
253 if(node_group == -1){
258 rand = rand % ndbNodes.size();
259 while(counter++ < ndbNodes.size() && ndbNodes[rand].node_group == node_group)
260 rand = (rand + 1) % ndbNodes.size();
262 if(ndbNodes[rand].node_group != node_group)
263 return ndbNodes[rand].node_id;
269 NdbRestarter::getRandomNodeSameNodeGroup(
int nodeId,
int rand){
273 if (getStatus() != 0)
277 for(
size_t i = 0; i < ndbNodes.size(); i++){
278 if(ndbNodes[i].node_id == nodeId){
279 node_group = ndbNodes[
i].node_group;
283 if(node_group == -1){
288 rand = rand % ndbNodes.size();
289 while(counter++ < ndbNodes.size() &&
290 (ndbNodes[rand].node_id == nodeId ||
291 ndbNodes[rand].node_group != node_group))
292 rand = (rand + 1) % ndbNodes.size();
294 if(ndbNodes[rand].node_group == node_group &&
295 ndbNodes[rand].node_id != nodeId)
296 return ndbNodes[rand].node_id;
304 NdbRestarter::waitConnected(
unsigned int _timeout){
306 while (isConnected() && getStatus() != 0){
307 if (_timeout-- == 0){
308 ndbout <<
"NdbRestarter::waitConnected failed" << endl;
311 NdbSleep_MilliSleep(100);
317 NdbRestarter::waitClusterStarted(
unsigned int _timeout){
322 NdbRestarter::waitClusterStartPhase(
int _startphase,
unsigned int _timeout){
327 NdbRestarter::waitClusterSingleUser(
unsigned int _timeout){
332 NdbRestarter::waitClusterNoStart(
unsigned int _timeout){
338 unsigned int _timeout,
341 int nodes[MAX_NDB_NODES];
344 if (getStatus() != 0){
345 g_err <<
"waitClusterStat: getStatus != 0" << endl;
350 for (
size_t i = 0; i < ndbNodes.size(); i++){
351 nodes[
i] = ndbNodes[
i].node_id;
355 return waitNodesState(nodes, numNodes, _status, _timeout, _startphase);
362 unsigned int _timeout,
366 g_err <<
"!isConnected"<<endl;
370 unsigned int attempts = 0;
371 unsigned int resetAttempts = 0;
372 const unsigned int MAX_RESET_ATTEMPTS = 10;
373 bool allInState =
false;
374 while (allInState ==
false){
375 if (_timeout > 0 && attempts > _timeout){
380 bool waitMore =
false;
391 for (
size_t n = 0;
n < ndbNodes.size();
n++){
399 if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
400 g_err <<
"waitNodesState("
402 <<
", "<<_startphase<<
")"
403 <<
" timeout after " << attempts <<
" attemps" << endl;
407 g_err <<
"waitNodesState("
409 <<
", "<<_startphase<<
")"
410 <<
" resetting number of attempts "
411 << resetAttempts << endl;
418 if (getStatus() != 0){
419 g_err <<
"waitNodesState: getStatus != 0" << endl;
427 for (
int i = 0; i < _num_nodes; i++){
429 for (
size_t n = 0;
n < ndbNodes.size();
n++){
430 if (ndbNodes[
n].node_id == _nodes[i])
431 ndbNode = &ndbNodes[
n];
439 g_info <<
"State node " << ndbNode->
node_id <<
" "
445 assert(ndbNode != NULL);
454 g_info <<
"status = "
462 <<
") != _status("<<_status<<
")"<<endl;
470 g_info <<
"Waiting for cluster enter state"
472 NdbSleep_SecSleep(1);
478 int NdbRestarter::waitNodesStarted(
const int * _nodes,
int _num_nodes,
479 unsigned int _timeout){
484 int NdbRestarter::waitNodesStartPhase(
const int * _nodes,
int _num_nodes,
485 int _startphase,
unsigned int _timeout){
491 int NdbRestarter::waitNodesNoStart(
const int * _nodes,
int _num_nodes,
492 unsigned int _timeout){
498 NdbRestarter::isConnected(){
499 if (connected ==
true)
501 return connect() == 0;
505 NdbRestarter::connect(){
509 g_err <<
"handle == NULL" << endl;
512 g_info <<
"Connecting to mgmsrv at " << addr.
c_str() << endl;
516 g_err <<
"Connection to " << addr.
c_str() <<
" failed" << endl;
523 g_err <<
"Connection to " << addr.
c_str() <<
" failed" << endl;
532 NdbRestarter::disconnect(){
541 NdbRestarter::getStatus(){
558 g_err <<
"Reconnected..." << endl;
562 if (err == NDB_MGM_COULD_NOT_CONNECT_TO_SOCKET){
563 g_err <<
"Could not connect to socket, sleep and retry" << endl;
565 NdbSleep_SecSleep(1);
570 ndbout <<
"status==NULL, retries="<<retries<<
" err=" << err << endl;
579 ndbNodes.push_back(*node);
582 mgmNodes.push_back(*node);
585 apiNodes.push_back(*node);
598 ndbout <<
"kalle"<< endl;
606 ndbout <<
"status == 0" << endl;
613 g_err <<
"getStatus failed" << endl;
618 int NdbRestarter::getNumDbNodes(){
622 if (getStatus() != 0)
625 return ndbNodes.size();
628 int NdbRestarter::restartAll(
bool initial,
638 force, &unused) == -1) {
640 g_err <<
"Could not restart(stop) all nodes " << endl;
644 if (waitClusterNoStart(60) != 0){
645 g_err <<
"Cluster didnt enter STATUS_NOT_STARTED within 60s" << endl;
650 g_debug <<
"restartAll: nostart == true" << endl;
656 g_err <<
"Could not restart(start) all nodes " << endl;
663 int NdbRestarter::startAll(){
669 g_err <<
"Could not start all nodes " << endl;
677 int NdbRestarter::startNodes(
const int * nodes,
int num_nodes){
683 g_err <<
"Could not start all nodes " << endl;
690 int NdbRestarter::insertErrorInNode(
int _nodeId,
int _error){
697 if (ndb_mgm_insert_error(handle, _nodeId, _error, &reply) == -1){
699 g_err <<
"Could not insert error in node with id = "<< _nodeId << endl;
702 g_err <<
"Error: " << reply.
message << endl;
707 int NdbRestarter::insertErrorInAllNodes(
int _error){
711 if (getStatus() != 0)
716 for(
size_t i = 0; i < ndbNodes.size(); i++){
717 g_debug <<
"inserting error in node " << ndbNodes[
i].node_id << endl;
718 if (insertErrorInNode(ndbNodes[i].
node_id, _error) == -1)
727 int NdbRestarter::dumpStateOneNode(
int _nodeId,
const int * _args,
int _num_args){
736 g_err <<
"Could not dump state in node with id = "<< _nodeId << endl;
740 g_err <<
"Error: " << reply.
message << endl;
745 int NdbRestarter::dumpStateAllNodes(
const int * _args,
int _num_args){
749 if (getStatus() != 0)
754 for(
size_t i = 0; i < ndbNodes.size(); i++){
755 g_debug <<
"dumping state in node " << ndbNodes[
i].node_id << endl;
756 if (dumpStateOneNode(ndbNodes[i].
node_id, _args, _num_args) == -1)
764 int NdbRestarter::enterSingleUserMode(
int _nodeId){
773 g_err <<
"Could not enter single user mode api node = "<< _nodeId << endl;
777 g_err <<
"Error: " << reply.
message << endl;
784 int NdbRestarter::exitSingleUserMode(){
793 g_err <<
"Could not exit single user mode " << endl;
797 g_err <<
"Error: " << reply.
message << endl;
803 NdbRestarter::getConfig(){
804 if(m_config)
return m_config;
813 NdbRestarter::getNode(NodeSelector
type)
817 return getDbNodeId(rand() % getNumDbNodes());
819 return getMasterNodeId();
821 return getRandomNotMasterNodeId(rand());
830 NdbRestarter::setReconnect(
bool val){
835 NdbRestarter::checkClusterAlive(
const int * deadnodes,
int num_nodes)
837 if (getStatus() != 0)
841 for (
int i = 0; i<num_nodes; i++)
842 mask.
set(deadnodes[i]);
844 for (
size_t n = 0;
n < ndbNodes.size();
n++)
846 if (mask.
get(ndbNodes[
n].node_id))
850 return ndbNodes[
n].node_id;
857 NdbRestarter::rollingRestart(Uint32
flags)
859 if (getStatus() != 0)
865 for(
size_t i = 0; i < ndbNodes.size(); i++)
867 if (ng_mask.
get(ndbNodes[i].node_group) ==
false)
869 ng_mask.
set(ndbNodes[i].node_group);
870 nodes.push_back(ndbNodes[i].
node_id);
871 restart_nodes.
set(ndbNodes[i].node_id);
877 (flags & NRRF_INITIAL) != 0,
878 (flags & NRRF_NOSTART) != 0,
879 (flags & NRRF_ABORT) != 0 ||
true) <= 0)
884 if (waitNodesNoStart(nodes.getBase(), nodes.size()))
887 if (startNodes(nodes.getBase(), nodes.size()))
890 if (waitClusterStarted())
894 for (Uint32 i = 0; i<ndbNodes.size(); i++)
896 if (restart_nodes.
get(ndbNodes[i].node_id) ==
false)
898 nodes.push_back(ndbNodes[i].
node_id);
899 restart_nodes.
set(ndbNodes[i].node_id);
909 NdbRestarter::getMasterNodeVersion(
int&
version)
911 int masterNodeId = getMasterNodeId();
912 if (masterNodeId != -1)
914 for(
size_t i = 0; i < ndbNodes.size(); i++)
916 if (ndbNodes[i].
node_id == masterNodeId)
918 version = ndbNodes[
i].version;
924 g_err <<
"Could not find node info for master node id "
925 << masterNodeId << endl;
937 if (getStatus() != 0)
954 g_err <<
"Bad node type : " << type << endl;
958 if (nodeVec->size() == 0)
960 g_err <<
"No nodes of type " << type <<
" online" << endl;
967 for(
size_t i = 0; i < nodeVec->size(); i++)
969 int nodeVer = (*nodeVec)[
i].version;
974 if (nodeVer > maxVer)
982 NdbRestarter::getNodeStatus(
int nodeid)
984 if (getStatus() != 0)
987 for (
size_t n = 0;
n < ndbNodes.size();
n++)
990 return ndbNodes[
n].node_status;