18 #include <signaldata/DumpStateOrd.hpp>
19 #include <NdbBackup.hpp>
21 #include <NDBT_Output.hpp>
22 #include <NdbConfig.h>
23 #include <ndb_version.h>
29 #define CHECK(b, m) { int _xx = b; if (!(_xx)) { \
30 ndbout << "ERR: "<< m \
31 << " " << "File: " << __FILE__ \
32 << " (Line: " << __LINE__ << ")" << "- " << _xx << endl; \
33 return NDBT_FAILED; } }
35 #include <ConfigRetriever.hpp>
37 #include <mgmapi_config_parameters.h>
38 #include <mgmapi_configuration.hpp>
41 NdbBackup::clearOldBackups()
52 for(
size_t i = 0;
i < ndbNodes.size();
i++)
54 int nodeId = ndbNodes[
i].node_id;
55 const char* path = getBackupDataDirForNode(nodeId);
67 tmp.
assfmt(
"ssh %s rm -rf %s/BACKUP", host, path);
69 ndbout <<
"buf: "<< tmp.
c_str() <<endl;
70 int res = system(tmp.
c_str());
71 ndbout <<
"ssh res: " << res << endl;
73 if (res && retCode == 0)
82 NdbBackup::start(
unsigned int & _backup_id,
84 unsigned int user_backup_id,
85 unsigned int logtype){
94 bool any = _backup_id == 0;
108 NdbSleep_SecSleep(3);
110 user_backup_id += 100;
121 g_err <<
"PLEASE CHECK CODE NdbBackup.cpp line=" << __LINE__ << endl;
131 NdbBackup::startLogEvent(){
139 g_err <<
"Can't create log event" << endl;
146 NdbBackup::checkBackupStatus(){
156 switch (log_event.type) {
170 ndb_mgm_destroy_logevent_handle(&log_handle);
176 NdbBackup::getBackupDataDirForNode(
int _node_id){
189 s =
"No error given!";
191 ndbout <<
"Could not fetch configuration" << endl;
200 if (iter.find(CFG_NODE_ID, _node_id)){
201 ndbout <<
"Invalid configuration fetched, DB missing" << endl;
205 unsigned int type = NODE_TYPE_DB + 1;
206 if(iter.get(CFG_TYPE_OF_SECTION, &type) || type != NODE_TYPE_DB){
207 ndbout <<
"type = " << type << endl;
208 ndbout <<
"Invalid configuration fetched, I'm wrong type of node" << endl;
213 if (iter.get(CFG_DB_BACKUP_DATADIR, &path)){
214 ndbout <<
"BackupDataDir not found" << endl;
223 NdbBackup::execRestore(
bool _restore_data,
226 unsigned _backup_id){
227 ndbout <<
"getBackupDataDir "<< _node_id <<endl;
229 const char* path = getBackupDataDirForNode(_node_id);
233 ndbout <<
"getHostName "<< _node_id <<endl;
243 tmp.
assfmt(
"scp %s:%s/BACKUP/BACKUP-%d/BACKUP-%d*.%d.* .",
249 ndbout <<
"buf: "<< tmp.
c_str() <<endl;
250 int res = system(tmp.
c_str());
252 ndbout <<
"scp res: " << res << endl;
254 if (res == 0 && _restore_meta)
258 tmp.
assfmt(
"%sndb_restore -c \"%s:%d\" -n %d -b %d -m -d .",
262 "valgrind --leak-check=yes -v "
269 ndbout <<
"buf: "<< tmp.
c_str() <<endl;
270 res = system(tmp.
c_str());
273 if (res == 0 && _restore_data)
276 tmp.
assfmt(
"%sndb_restore -c \"%s:%d\" -n %d -b %d -r .",
280 "valgrind --leak-check=yes -v "
287 ndbout <<
"buf: "<< tmp.
c_str() <<endl;
288 res = system(tmp.
c_str());
291 ndbout <<
"ndb_restore res: " << res << endl;
297 NdbBackup::restore(
unsigned _backup_id){
302 if (getStatus() != 0)
308 res = execRestore(
true,
true, ndbNodes[0].node_id, _backup_id);
311 for(
size_t i = 1;
i < ndbNodes.size();
i++){
312 res = execRestore(
true,
false, ndbNodes[
i].node_id, _backup_id);
320 NFDuringBackupM_codes[] = {
333 NFDuringBackupS_codes[] = {
344 NFDuringBackupSL_codes[] = {
352 const int sz =
sizeof(NFDuringBackupM_codes)/
sizeof(NFDuringBackupM_codes[0]);
353 return NF(_restarter, NFDuringBackupM_codes, sz,
true);
358 const int sz =
sizeof(NFDuringBackupS_codes)/
sizeof(NFDuringBackupS_codes[0]);
359 return NF(_restarter, NFDuringBackupS_codes, sz,
true);
364 const int sz =
sizeof(NFDuringBackupS_codes)/
sizeof(NFDuringBackupS_codes[0]);
365 return NF(_restarter, NFDuringBackupS_codes, sz,
false);
369 NdbBackup::NF(
NdbRestarter& _restarter,
int *NFDuringBackup_codes,
const int sz,
bool onMaster){
370 int nNodes = _restarter.getNumDbNodes();
375 int nodeId = _restarter.getMasterNodeId();
377 CHECK(_restarter.restartOneDbNode(nodeId,
false,
true,
true) == 0,
378 "Could not restart node "<< nodeId);
380 CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
381 "waitNodesNoStart failed");
383 CHECK(_restarter.startNodes(&nodeId, 1) == 0,
384 "failed to start node");
387 CHECK(_restarter.waitClusterStarted() == 0,
388 "waitClusterStarted failed");
390 myRandom48Init((
long)NdbTick_CurrentMillisecond());
392 for(
int i = 0;
i<sz;
i++){
394 int error = NFDuringBackup_codes[
i];
395 unsigned int backupId;
397 const int masterNodeId = _restarter.getMasterNodeId();
398 CHECK(masterNodeId > 0,
"getMasterNodeId failed");
401 nodeId = masterNodeId;
404 while (nodeId == masterNodeId) {
405 randomId = myRandom48(nNodes);
406 nodeId = _restarter.getDbNodeId(randomId);
410 g_err <<
"NdbBackup::NF node = " << nodeId
411 <<
" error code = " << error <<
" masterNodeId = "
412 << masterNodeId << endl;
415 int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
416 CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0,
417 "failed to set RestartOnErrorInsert");
418 CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
419 "failed to set error insert");
421 g_info <<
"error inserted" << endl;
422 NdbSleep_SecSleep(1);
424 g_info <<
"starting backup" << endl;
425 int r = start(backupId);
426 g_info <<
"r = " << r
427 <<
" (which should fail) started with id = " << backupId << endl;
429 g_err <<
"Backup should have failed on error_insertion " << error << endl
430 <<
"Master = " << masterNodeId <<
"Node = " << nodeId << endl;
434 CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
435 "waitNodesNoStart failed");
437 g_info <<
"number of nodes running " << _restarter.getNumDbNodes() << endl;
439 if (_restarter.getNumDbNodes() != nNodes) {
440 g_err <<
"Failure: cluster not up" << endl;
444 g_info <<
"starting new backup" << endl;
445 CHECK(start(backupId) == 0,
446 "failed to start backup");
447 g_info <<
"(which should succeed) started with id = " << backupId << endl;
449 g_info <<
"starting node" << endl;
450 CHECK(_restarter.startNodes(&nodeId, 1) == 0,
451 "failed to start node");
453 CHECK(_restarter.waitClusterStarted() == 0,
454 "waitClusterStarted failed");
455 g_info <<
"node started" << endl;
457 int val2[] = { 24, 2424 };
458 CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
459 "failed to check backup resources RestartOnErrorInsert");
461 CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
462 "failed to set error insert");
464 NdbSleep_SecSleep(1);
496 const int sz =
sizeof(FailM_codes)/
sizeof(FailM_codes[0]);
497 return Fail(_restarter, FailM_codes, sz,
true);
502 const int sz =
sizeof(FailS_codes)/
sizeof(FailS_codes[0]);
503 return Fail(_restarter, FailS_codes, sz,
true);
508 const int sz =
sizeof(FailS_codes)/
sizeof(FailS_codes[0]);
509 return Fail(_restarter, FailS_codes, sz,
false);
513 NdbBackup::Fail(
NdbRestarter& _restarter,
int *Fail_codes,
const int sz,
bool onMaster){
515 CHECK(_restarter.waitClusterStarted() == 0,
516 "waitClusterStarted failed");
518 int nNodes = _restarter.getNumDbNodes();
520 myRandom48Init((
long)NdbTick_CurrentMillisecond());
522 for(
int i = 0;
i<sz;
i++){
523 int error = Fail_codes[
i];
524 unsigned int backupId;
526 const int masterNodeId = _restarter.getMasterNodeId();
527 CHECK(masterNodeId > 0,
"getMasterNodeId failed");
530 nodeId = masterNodeId;
533 while (nodeId == masterNodeId) {
534 randomId = myRandom48(nNodes);
535 nodeId = _restarter.getDbNodeId(randomId);
539 g_err <<
"NdbBackup::Fail node = " << nodeId
540 <<
" error code = " << error <<
" masterNodeId = "
541 << masterNodeId << endl;
543 CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
544 "failed to set error insert");
546 g_info <<
"error inserted" << endl;
547 g_info <<
"waiting some before starting backup" << endl;
549 g_info <<
"starting backup" << endl;
550 int r = start(backupId);
551 g_info <<
"r = " << r
552 <<
" (which should fail) started with id = " << backupId << endl;
554 g_err <<
"Backup should have failed on error_insertion " << error << endl
555 <<
"Master = " << masterNodeId <<
"Node = " << nodeId << endl;
559 CHECK(_restarter.waitClusterStarted() == 0,
560 "waitClusterStarted failed");
562 CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
563 "failed to set error insert");
565 NdbSleep_SecSleep(5);
567 int val2[] = { 24, 2424 };
568 CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
569 "failed to check backup resources RestartOnErrorInsert");