Skip to content

Commit ff79690

Browse files
committed
Merge pull request #26 from InMobi/connectionReset
fix SCRIBE-21: connection reset to honor retry_interval when collector i...
2 parents 1fe9f35 + d215d6f commit ff79690

File tree

2 files changed

+21
-31
lines changed

2 files changed

+21
-31
lines changed

src/store.cpp

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,7 +1868,7 @@ NetworkStore::NetworkStore(StoreQueue* storeq,
18681868
baseResetInterval(0),
18691869
resetIntervalRange(0),
18701870
resetInterval(0),
1871-
lastResetTime(0) {
1871+
lastOpenedTime(0) {
18721872
// we can't open the connection until we get configured
18731873

18741874
// the bool for opened ensures that we don't make duplicate
@@ -1946,15 +1946,9 @@ void NetworkStore::configure(pStoreConf configuration, pStoreConf parent) {
19461946
if (configuration->getString("reset_interval_range", temp)) {
19471947
resetIntervalRange = getTimeInSeconds(temp);
19481948
}
1949-
// if connection reset is enabled, initialize reset settings
19501949
if (baseResetInterval > 0) {
19511950
LOG_OPER("[%s] Base connection reset interval: [%d] seconds, reset interval range: [%d] seconds",
19521951
categoryHandled.c_str(), (int) baseResetInterval, (int) resetIntervalRange);
1953-
lastResetTime = time(NULL);
1954-
// if reset interval range is <=0, then ignore it
1955-
resetInterval = (resetIntervalRange <= 0) ? baseResetInterval : baseResetInterval + rand() % resetIntervalRange;
1956-
LOG_OPER("[%s] Next connection reset interval is set to: [%d] seconds",
1957-
categoryHandled.c_str(), (int) resetInterval);
19581952
}
19591953
}
19601954

@@ -2014,26 +2008,22 @@ void NetworkStore::periodicCheck() {
20142008
}
20152009
}
20162010

2017-
// if reset interval is configured, check if connection needs to be reset now
2018-
if (resetInterval > 0) {
2011+
// if connection exists and reset is configured, check if connection needs to be reset
2012+
if (opened && resetInterval > 0) {
20192013
time_t now = time(NULL);
2020-
if (now - lastResetTime >= resetInterval) {
2021-
LOG_OPER("[%s] Connection reset: closing old connection", categoryHandled.c_str());
2014+
if (now - lastOpenedTime >= resetInterval) {
2015+
LOG_OPER("[%s] Connection reset: closing existing connection", categoryHandled.c_str());
20222016
// close the existing connection
20232017
close();
20242018

2025-
// open a new connection
2019+
// open a new connection. If open fails for any reason, further reset will not trigger until
2020+
// connection is reopened by retry_interval attempts. If network store is contained within
2021+
// buffer store, the latter will detect connection break when handleMessages() fails to write
2022+
// messages over broken connection and will change state to DISCONNECTED. The periodicCheck()
2023+
// of buffer store will subsequently perform retry attempts to open new connection.
20262024
if (open()) {
20272025
// successfully opened
20282026
LOG_OPER("[%s] Connection reset: opened new connection", categoryHandled.c_str());
2029-
2030-
// update lastResetTime and resetInterval
2031-
lastResetTime = now;
2032-
// if reset interval range is <=0, then ignore it
2033-
resetInterval = (resetIntervalRange <= 0) ? baseResetInterval : baseResetInterval + rand() % resetIntervalRange;
2034-
LOG_OPER("[%s] Next connection reset interval is set to: [%d] seconds",
2035-
categoryHandled.c_str(), (int) resetInterval);
2036-
20372027
//increment counter for number of reset connections
20382028
g_Handler->incCounter(categoryHandled, "connection_resets");
20392029
}
@@ -2112,6 +2102,16 @@ bool NetworkStore::open() {
21122102
} else {
21132103
setStatus("Failed to connect");
21142104
}
2105+
2106+
// if connection is opened and reset is configured, update reset settings
2107+
if (opened && baseResetInterval > 0) {
2108+
lastOpenedTime = time(NULL);
2109+
// if reset interval range is <=0, then ignore it
2110+
resetInterval = (resetIntervalRange <= 0) ? baseResetInterval :
2111+
baseResetInterval + rand() % resetIntervalRange;
2112+
LOG_OPER("[%s] Next connection reset interval is set to: [%d] seconds",
2113+
categoryHandled.c_str(), (int) resetInterval);
2114+
}
21152115
return opened;
21162116
}
21172117

@@ -2152,16 +2152,6 @@ shared_ptr<Store> NetworkStore::copy(const std::string &category) {
21522152
store->baseResetInterval = baseResetInterval;
21532153
store->resetIntervalRange = resetIntervalRange;
21542154

2155-
if (baseResetInterval > 0) {
2156-
LOG_OPER("[%s] Base connection reset interval: [%d] seconds, reset interval range: [%d] seconds",
2157-
category.c_str(), (int) baseResetInterval, (int) resetIntervalRange);
2158-
// if reset interval range is <=0, then ignore it
2159-
store->resetInterval = (resetIntervalRange <= 0) ? baseResetInterval : baseResetInterval + rand() % resetIntervalRange;
2160-
store->lastResetTime = time(NULL);
2161-
LOG_OPER("[%s] Next connection reset interval is set to: [%d] seconds",
2162-
category.c_str(), (int) store->resetInterval);
2163-
}
2164-
21652155
return copied;
21662156
}
21672157

src/store.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ class NetworkStore : public Store {
429429
time_t baseResetInterval; // base connection reset time in seconds
430430
time_t resetIntervalRange; // random backoff interval in seconds
431431
time_t resetInterval; // current reset interval calcuated in seconds
432-
time_t lastResetTime; // time when the connection was last reset
432+
time_t lastOpenedTime; // time when the connection was last opened
433433

434434
private:
435435
// disallow copy, assignment, and empty construction

0 commit comments

Comments
 (0)