Skip to content

Commit

Permalink
Changed to keep map of DNS addresses due to 5 second DNS delays.
Browse files Browse the repository at this point in the history
  • Loading branch information
jgates108 committed Dec 12, 2019
1 parent d2eb8d9 commit 91805e2
Show file tree
Hide file tree
Showing 18 changed files with 114 additions and 129 deletions.
5 changes: 3 additions & 2 deletions admin/templates/configuration/etc/log4cxx.index.properties
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
# export LSST_LOG_CONFIG=$HOME/.lsst/log4cxx.unittest.properties
#

#log4j.rootLogger=INFO, CONSOLE
log4j.rootLogger=DEBUG, CONSOLE
log4j.rootLogger=INFO, CONSOLE
#log4j.rootLogger=DEBUG, CONSOLE
#log4j.rootLogger=WARN, CONSOLE

log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
Expand Down
2 changes: 0 additions & 2 deletions admin/tools/docker/index/container/dev/README
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
# These are the optional building steps (in case if this container needs to
# be rebuilt from scratch (which is going to take ~2 hours or longer)

#docker build -t qserv/replica:dev . &&&
#docker push qserv/replica:dev &&&
docker build -t qserv/loaderbase:dev .
docker push qserv/loaderbase:dev

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ source /qserv/stack/loadLSST.bash
cd /home/qserv/dev/qserv
setup -r . -t qserv-dev

export LSST_LOG_CONFIG=/home/qserv/dev/qserv/admin/templates/configuration/etc/log4cxx.index.properties
export LSST_LOG_CONFIG=/home/qserv/dev/qserv/admin/templates/configuration/etc/log4cxx.index_master.properties

/home/qserv/dev/qserv/build/loader/appMaster /home/qserv/dev/qserv/core/modules/loader/config/master.cnf

Expand Down
17 changes: 2 additions & 15 deletions core/modules/loader/BufferUdp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -134,20 +134,7 @@ std::shared_ptr<MsgElement> BufferUdp::_safeRetrieve(std::string const& note) {

bool BufferUdp::isRetrieveSafe(size_t len) const {
auto newLen = (_rCursor + len);
// &&&return (newLen <= _end && newLen <= _wCursor);
bool res = (newLen <= _end && newLen <= _wCursor); // &&&
if (!res) { // &&&
LOGS(_log, LOG_LVL_WARN, "&&& BufferUdp::isRetrieveSafe not safe len=" << len <<
" rCursor=" << (void*)_rCursor <<
" newLen=" << (void*)newLen <<
" wCursor=" << (void*)_wCursor <<
" _end=" << (void*)_end <<
" (newLen<=end)=" << (newLen <= _end) <<
" (newLen<=_wCursor)=" << (newLen <= _wCursor) <<
" res=" << res);
LOGS(_log, LOG_LVL_WARN, "&&& BufferUdp::isRetrieveSafe " << dumpStr(false));
}
return res;
return (newLen <= _end && newLen <= _wCursor);
}


Expand All @@ -157,7 +144,7 @@ bool BufferUdp::retrieve(void* out, size_t len) {
_rCursor += len;
return true;
}
LOGS(_log, LOG_LVL_WARN, "&&& BufferUdp::retrieve not safe len=" << len);
LOGS(_log, LOG_LVL_DEBUG, "BufferUdp::retrieve not safe len=" << len);
return false;
}

Expand Down
6 changes: 3 additions & 3 deletions core/modules/loader/CentralClient.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ void CentralClient::_handleKeyLookup(LoaderMsg const& inMsg, std::unique_ptr<pro
_waitingKeyLookupMap.erase(iter);
}
keyLookupOneShot->keyInfoComplete(key, chunkInfo.chunk, chunkInfo.subchunk, protoData->success());
LOGS(_log, LOG_LVL_INFO, "Successful KEY_LOOKUP key=" << key << " " << chunkInfo);
LOGS(_log, LOG_LVL_WARN, "&&&INFO Successful KEY_LOOKUP key=" << key << " " << chunkInfo);
}


Expand Down Expand Up @@ -173,7 +173,7 @@ void CentralClient::_handleKeyInsertComplete(LoaderMsg const& inMsg, std::unique
mapSize = _waitingKeyInsertMap.size();
}
keyInsertOneShot->keyInsertComplete();
LOGS(_log, LOG_LVL_INFO, "Successful KEY_INSERT_COMPLETE key=" << key << " " << chunkInfo <<
LOGS(_log, LOG_LVL_WARN, "&&&INFO Successful KEY_INSERT_COMPLETE key=" << key << " " << chunkInfo <<
" mapSize=" << mapSize);
}

Expand Down Expand Up @@ -355,7 +355,7 @@ void CentralClient::getWorkerForKey(CompositeKey const& key, std::string& ip, in
auto nAddr = worker->getUdpAddress();
ip = nAddr.ip;
port = nAddr.port;
LOGS(_log, LOG_LVL_DEBUG, "getWorkerForKey " << key << " worker=" << worker);
LOGS(_log, LOG_LVL_DEBUG, "getWorkerForKey " << key << " worker=" << *worker);
} else {
ip = getDefWorkerHost();
port = getDefWorkerPortUdp();
Expand Down
96 changes: 19 additions & 77 deletions core/modules/loader/CentralWorker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "loader/WorkerConfig.h"
#include "proto/loader.pb.h"
#include "proto/ProtoImporter.h"
#include "util/Timer.h" // &&&


// LSST headers
Expand Down Expand Up @@ -126,6 +127,7 @@ void CentralWorker::_monitor() {
usleep(500000);
return;
}
LOGS(_log, LOG_LVL_WARN, "&&& CentralWorker::_monitor A");

// If data gets shifted, check everything again as ranges will have
// changed and there may be a lot more data to shift.
Expand Down Expand Up @@ -191,6 +193,7 @@ void CentralWorker::_monitor() {
_sendWorkerKeysInfo(masterAddr, getNextMsgId());
}
} while (dataShifted);
LOGS(_log, LOG_LVL_WARN, "&&& CentralWorker::_monitor Z");
}


Expand Down Expand Up @@ -667,80 +670,6 @@ void CentralWorker::cancelShiftsWithLeftNeighbor() {
}
}

/* &&&
bool CentralWorker::workerInfoReceive(BufferUdp::Ptr const& data) {
// Open the data protobuffer and add it to our list.
StringElement::Ptr sData = std::dynamic_pointer_cast<StringElement>(MsgElement::retrieve(*data, " CentralWorker::workerInfoReceive&&& "));
if (sData == nullptr) {
LOGS(_log, LOG_LVL_WARN, "CentralWorker::workerInfoRecieve Failed to parse list");
return false;
}
std::unique_ptr<proto::WorkerListItem> protoList = sData->protoParse<proto::WorkerListItem>();
if (protoList == nullptr) {
LOGS(_log, LOG_LVL_WARN, "CentralWorker::workerInfoRecieve Failed to parse list");
return false;
}
// TODO: move this call to another thread
_workerInfoReceive(protoList);
return true;
}
*/

/* &&&
void CentralWorker::_workerInfoReceive(std::unique_ptr<proto::WorkerListItem>& protoL) {
std::unique_ptr<proto::WorkerListItem> protoList(std::move(protoL));
// Check the information, if it is our network address, set or check our id.
// Then compare it with the map, adding new/changed information.
uint32_t wId = protoList->wid();
std::string ipUdp("");
int portUdp = 0;
int portTcp = 0;
if (protoList->has_address()) {
proto::LdrNetAddress protoAddr = protoList->address();
ipUdp = protoAddr.ip();
portUdp = protoAddr.udpport();
portTcp = protoAddr.tcpport();
}
KeyRange strRange;
if (protoList->has_range()) {
proto::WorkerRange protoRange = protoList->range();
bool valid = protoRange.valid();
if (valid) {
CompositeKey min(protoRange.minint(), protoRange.minstr());
CompositeKey max(protoRange.maxint(), protoRange.maxstr());
bool unlimited = protoRange.maxunlimited();
strRange.setMinMax(min, max, unlimited);
}
}
// If the address matches ours, check the name.
if (getHostName() == ipUdp && getUdpPort() == portUdp) {
if (_isOurIdInvalid()) {
LOGS(_log, LOG_LVL_INFO, "Setting our name " << wId);
_setOurId(wId);
} else if (getOurId() != wId) {
LOGS(_log, LOG_LVL_ERROR, "Our wId doesn't match address from master! wId=" <<
getOurId() << " from master=" << wId);
}
// It is this worker. If there is a valid range in the message and our range is not valid,
// take the range given as our own.
if (strRange.getValid()) {
std::lock_guard<std::mutex> lckM(_idMapMtx);
if (not _keyRange.getValid()) {
LOGS(_log, LOG_LVL_INFO, "Setting our range " << strRange);
_keyRange.setMinMax(strRange.getMin(), strRange.getMax(), strRange.getUnlimited());
}
}
}
// Make/update entry in map.
_wWorkerList->updateEntry(wId, ipUdp, portUdp, portTcp, strRange);
}
*/


void CentralWorker::checkForThisWorkerValues(uint32_t wId, std::string const& ip,
int portUdp, int portTcp, KeyRange& strRange) {
Expand Down Expand Up @@ -826,6 +755,9 @@ bool CentralWorker::workerKeyInsertReq(LoaderMsg const& inMsg, BufferUdp::Ptr co
}


util::Timer lastInsertTimer; // &&&
std::mutex lastInsertTimerMtx; // &&&

void CentralWorker::_workerKeyInsertReq(LoaderMsg const& inMsg, std::unique_ptr<proto::KeyInfoInsert>& protoBuf) {
std::unique_ptr<proto::KeyInfoInsert> protoData(std::move(protoBuf));

Expand All @@ -850,7 +782,17 @@ void CentralWorker::_workerKeyInsertReq(LoaderMsg const& inMsg, std::unique_ptr<
// Element already found, check file id and row number. Bad if not the same.
// TODO HIGH send back duplicate key mismatch message to the original requester and return
}
LOGS(_log, LOG_LVL_INFO, "Key inserted=" << key << "(" << chunkInfo << ")");
{
std::lock_guard<std::mutex> tLg(lastInsertTimerMtx);
lastInsertTimer.stop();
auto elapsedInsert = lastInsertTimer.getElapsed();
if (elapsedInsert > 0.5) {
LOGS(_log, LOG_LVL_ERROR, "&&& Longdelay key=" << key << " dlay=" << elapsedInsert);
}
// &&& LOGS(_log, LOG_LVL_INFO, "Key inserted=" << key << "(" << chunkInfo << ")");
LOGS(_log, LOG_LVL_WARN, "&&&INFO Key inserted=" << key << "(" << chunkInfo << ") dlay=" << elapsedInsert);
lastInsertTimer.start();
}
// TODO Send this item to the keyLogger (which would then send KEY_INSERT_COMPLETE back to the requester),
// for now this function will send the message back for proof of concept.
LoaderMsg msg(LoaderMsg::KEY_INSERT_COMPLETE, inMsg.msgId->element, getHostName(), getUdpPort());
Expand Down Expand Up @@ -898,10 +840,10 @@ void CentralWorker::_forwardKeyInsertRequest(NetworkAddress const& targetAddr, L
// The proto buffer should be the same, just need a new message.
int hops = protoData->hops() + 1;
if (hops > 4) { // TODO replace magic number with variable set via config file.
LOGS(_log, LOG_LVL_INFO, "Too many hops, dropping insert request hops=" << hops << " key=" << key);
LOGS(_log, LOG_LVL_WARN, "Too many hops, dropping insert request hops=" << hops << " key=" << key);
return;
}
LOGS(_log, LOG_LVL_INFO, "Forwarding key insert hops=" << hops << " key=" << key);
LOGS(_log, LOG_LVL_WARN, "&&&INFO Forwarding key insert hops=" << hops << " key=" << key);
LoaderMsg msg(LoaderMsg::KEY_INSERT_REQ, inMsg.msgId->element, getHostName(), getUdpPort());
BufferUdp msgData;
msg.appendToData(msgData);
Expand Down
5 changes: 4 additions & 1 deletion core/modules/loader/DoListItem.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ class DoListItem : public std::enable_shared_from_this<DoListItem> {

virtual ~DoListItem() = default;

util::CommandTracked::Ptr runIfNeeded(TimeOut::TimePoint now);
/* &&&
util::CommandTracked::Ptr runIfNeeded(TimeOut::TimePoint now) {
std::lock_guard<std::mutex> lock(_mtx);
if (_command == nullptr) {
Expand All @@ -117,6 +119,7 @@ class DoListItem : public std::enable_shared_from_this<DoListItem> {
}
return nullptr;
}
*/

bool isAlreadyOnList() { return _addedToList; }

Expand Down Expand Up @@ -177,7 +180,7 @@ class DoListItem : public std::enable_shared_from_this<DoListItem> {
/// If no info is needed, check for info after this period of time.
TimeOut _timeOut{std::chrono::minutes(5)};
/// Rate limiter, no more than 1 message every few seconds
TimeOut _timeRateLimit{std::chrono::milliseconds(1500)}; // TODO: DM-17453 set via config
TimeOut _timeRateLimit{std::chrono::milliseconds(7500)}; // TODO: DM-17453 set via config
util::CommandTracked::Ptr _command;
std::mutex _mtx; ///< protects _timeOut, _timeRequest, _command, _oneShot, _needInfo
/// Number of times the command needed to be created. It's only tracked for oneShots as
Expand Down
2 changes: 1 addition & 1 deletion core/modules/loader/ServerTcpBase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ void TcpBaseConnection::_handleImYourLNeighbor1(boost::system::error_code const&
CompositeKey maxKey(protoRange.maxint(), protoRange.maxstr());
bool unlimited = protoRange.maxunlimited();
leftRange.setMinMax(minKey, maxKey, unlimited);
LOGS(_log, LOG_LVL_WARN, funcName << " leftRange=" << leftRange);
LOGS(_log, LOG_LVL_INFO, funcName << " leftRange=" << leftRange);
newLeftRange = _serverTcpBase->getCentralWorker()->updateRangeWithLeftData(leftRange);
}
proto::Neighbor protoLeftNeigh = protoItem->left();
Expand Down
15 changes: 14 additions & 1 deletion core/modules/loader/ServerUdpBase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ void ServerUdpBase::_receiveCallback(boost::system::error_code const& error, siz


void ServerUdpBase::_sendResponse() {
LOGS(_log, LOG_LVL_INFO, "&&&udp:_sendResponse");
_socket.async_send_to(boost::asio::buffer(_sendData->getReadCursor(), _sendData->getBytesLeftToRead()),
_senderEndpoint,
[this](boost::system::error_code const& ec, std::size_t bytes_transferred) {
Expand All @@ -87,7 +88,16 @@ void ServerUdpBase::sendBufferTo(std::string const& hostName, int port, BufferUd
using namespace boost::asio;
LOGS(_log, LOG_LVL_DEBUG, "ServerUdpBase::sendBufferTo hostName=" << hostName << " port=" << port);
try {
ip::udp::endpoint dest = resolve(hostName, port);
NetworkAddress addr(hostName, port);
ip::udp::endpoint dest;
auto iter = _resolvMap.find(addr);
if (iter == _resolvMap.end()) {
dest = resolve(hostName, port); // may throw boost::system::system_error
_resolvMap[addr] = dest;
} else {
dest = iter->second;
}
// &&& ip::udp::endpoint dest = resolve(hostName, port);
_socket.send_to(buffer(sendBuf.getReadCursor(), sendBuf.getBytesLeftToRead()), dest);
} catch (boost::system::system_error const& e) {
LOGS(_log, LOG_LVL_ERROR, "ServerUdpBase::sendBufferTo boost system_error=" << e.what() <<
Expand All @@ -114,6 +124,7 @@ void ServerUdpBase::_sendCallback(const boost::system::error_code& error, size_t
}

void ServerUdpBase::_receivePrepare() {
LOGS(_log, LOG_LVL_INFO, "&&&udp:_receivePrepare");
_data = std::make_shared<BufferUdp>(); // New buffer for next response, the old buffer
// may still be in use elsewhere.
_socket.async_receive_from(boost::asio::buffer(_data->getWriteCursor(),
Expand All @@ -130,8 +141,10 @@ boost::asio::ip::udp::endpoint ServerUdpBase::resolve(std::string const& hostNam
using namespace boost::asio;
// Resolver returns an iterator. This uses the first item only.
// Failure to resolve anything throws a boost::system::error.
LOGS(_log, LOG_LVL_INFO, "&&&udp:resolve a");
ip::udp::endpoint dest =
*_resolver.resolve(ip::udp::v4(), hostName, std::to_string(port)).begin();
LOGS(_log, LOG_LVL_INFO, "&&&udp:resolve b");
return dest;
}

Expand Down
5 changes: 5 additions & 0 deletions core/modules/loader/ServerUdpBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

// Qserv headers
#include "loader/BufferUdp.h"
#include "loader/NetworkAddress.h"

namespace lsst {
namespace qserv {
Expand Down Expand Up @@ -87,6 +88,10 @@ class ServerUdpBase {
std::string _hostName;
int _port;

/// Map and mutex to store ip destinations
// TODO: add occasional checks to see if addresses changed
std::map<NetworkAddress, boost::asio::ip::udp::endpoint> _resolvMap;

/// Items for resolving UDP addresses
/// There appear to be concurrency issues even with
/// separate io_contexts, so re-using existing objects.
Expand Down
Loading

0 comments on commit 91805e2

Please sign in to comment.