mirror of
https://github.com/kasmtech/KasmVNC.git
synced 2025-01-10 07:58:52 +01:00
485 lines
13 KiB
C++
485 lines
13 KiB
C++
/* Copyright 2009-2018 Pierre Ossman for Cendio AB
|
|
*
|
|
* This is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This software is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this software; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
|
* USA.
|
|
*/
|
|
|
|
/*
|
|
* This code implements congestion control in the same way as TCP in
|
|
* order to avoid excessive latency in the transport. This is needed
|
|
* because "buffer bloat" is unfortunately still a very real problem.
|
|
*
|
|
* The basic principle is TCP Congestion Control (RFC 5618), with the
|
|
* addition of using the TCP Vegas algorithm. The reason we use Vegas
|
|
* is that we run on top of a reliable transport so we need a latency
|
|
* based algorithm rather than a loss based one. There is also a lot of
|
|
* interpolation of values. This is because we have rather horrible
|
|
* granularity in our measurements.
|
|
*
|
|
* We use a simplistic form of slow start in order to ramp up quickly
|
|
* from an idle state. We do not have any persistent threshold though
|
|
* as we have too much noise for it to be reliable.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <sys/time.h>
|
|
|
|
#ifdef __linux__
|
|
#include <sys/ioctl.h>
|
|
#include <sys/socket.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/tcp.h>
|
|
#include <linux/sockios.h>
|
|
#endif
|
|
|
|
#include <rfb/Congestion.h>
|
|
#include <rfb/LogWriter.h>
|
|
#include <rfb/util.h>
|
|
|
|
// Debug output on what the congestion control is up to
|
|
#undef CONGESTION_DEBUG
|
|
|
|
// Dump socket congestion window debug trace to disk
|
|
#undef CONGESTION_TRACE
|
|
|
|
using namespace rfb;
|
|
|
|
// This window should get us going fairly fast on a decent bandwidth network.
|
|
// If it's too high, it will rapidly be reduced and stay low.
|
|
static const unsigned INITIAL_WINDOW = 16384;
|
|
|
|
// TCP's minimal window is 3*MSS. But since we don't know the MSS, we
|
|
// make a guess at 4 KiB (it's probably a bit higher).
|
|
static const unsigned MINIMUM_WINDOW = 4096;
|
|
|
|
// The current default maximum window for Linux (4 MiB). Should be a good
|
|
// limit for now...
|
|
static const unsigned MAXIMUM_WINDOW = 4194304;
|
|
|
|
// Compare position even when wrapped around
|
|
static inline bool isAfter(unsigned a, unsigned b) {
|
|
return a != b && a - b <= UINT_MAX / 2;
|
|
}
|
|
|
|
static LogWriter vlog("Congestion");
|
|
|
|
Congestion::Congestion() :
|
|
lastPosition(0), extraBuffer(0),
|
|
baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
|
|
safeBaseRTT(-1), measurements(0), minRTT(-1), minCongestedRTT(-1)
|
|
{
|
|
gettimeofday(&lastUpdate, NULL);
|
|
gettimeofday(&lastSent, NULL);
|
|
memset(&lastPong, 0, sizeof(lastPong));
|
|
gettimeofday(&lastPongArrival, NULL);
|
|
gettimeofday(&lastAdjustment, NULL);
|
|
}
|
|
|
|
Congestion::~Congestion()
|
|
{
|
|
}
|
|
|
|
void Congestion::updatePosition(unsigned pos)
|
|
{
|
|
struct timeval now;
|
|
unsigned delta, consumed;
|
|
|
|
gettimeofday(&now, NULL);
|
|
|
|
delta = pos - lastPosition;
|
|
if ((delta > 0) || (extraBuffer > 0))
|
|
lastSent = now;
|
|
|
|
// Idle for too long?
|
|
// We use a very crude RTO calculation in order to keep things simple
|
|
// FIXME: should implement RFC 2861
|
|
if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
|
|
|
|
#ifdef CONGESTION_DEBUG
|
|
vlog.debug("Connection idle for %d ms, resetting congestion control",
|
|
msBetween(&lastSent, &now));
|
|
#endif
|
|
|
|
// Close congestion window and redo wire latency measurement
|
|
congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
|
|
baseRTT = -1;
|
|
measurements = 0;
|
|
gettimeofday(&lastAdjustment, NULL);
|
|
minRTT = minCongestedRTT = -1;
|
|
inSlowStart = true;
|
|
}
|
|
|
|
// Commonly we will be in a state of overbuffering. We need to
|
|
// estimate the extra delay that causes so we can separate it from
|
|
// the delay caused by an incorrect congestion window.
|
|
// (we cannot do this until we have a RTT measurement though)
|
|
if (baseRTT != (unsigned)-1) {
|
|
extraBuffer += delta;
|
|
consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
|
|
if (extraBuffer < consumed)
|
|
extraBuffer = 0;
|
|
else
|
|
extraBuffer -= consumed;
|
|
}
|
|
|
|
lastPosition = pos;
|
|
lastUpdate = now;
|
|
}
|
|
|
|
void Congestion::sentPing()
|
|
{
|
|
struct RTTInfo rttInfo;
|
|
|
|
memset(&rttInfo, 0, sizeof(struct RTTInfo));
|
|
|
|
gettimeofday(&rttInfo.tv, NULL);
|
|
rttInfo.pos = lastPosition;
|
|
rttInfo.extra = getExtraBuffer();
|
|
rttInfo.congested = isCongested();
|
|
|
|
pings.push_back(rttInfo);
|
|
}
|
|
|
|
void Congestion::gotPong()
|
|
{
|
|
struct timeval now;
|
|
struct RTTInfo rttInfo;
|
|
unsigned rtt, delay;
|
|
|
|
if (pings.empty())
|
|
return;
|
|
|
|
gettimeofday(&now, NULL);
|
|
|
|
rttInfo = pings.front();
|
|
pings.pop_front();
|
|
|
|
lastPong = rttInfo;
|
|
lastPongArrival = now;
|
|
|
|
rtt = msBetween(&rttInfo.tv, &now);
|
|
if (rtt < 1)
|
|
rtt = 1;
|
|
|
|
// Try to estimate wire latency by tracking lowest seen latency
|
|
if (rtt < baseRTT)
|
|
safeBaseRTT = baseRTT = rtt;
|
|
|
|
// Pings sent before the last adjustment aren't interesting as they
|
|
// aren't a measurement of the current congestion window
|
|
if (isBefore(&rttInfo.tv, &lastAdjustment))
|
|
return;
|
|
|
|
// Estimate added delay because of overtaxed buffers (see above)
|
|
delay = rttInfo.extra * baseRTT / congWindow;
|
|
if (delay < rtt)
|
|
rtt -= delay;
|
|
else
|
|
rtt = 1;
|
|
|
|
// A latency less than the wire latency means that we've
|
|
// understimated the congestion window. We can't really determine
|
|
// how much, so pretend that we got no buffer latency at all.
|
|
if (rtt < baseRTT)
|
|
rtt = baseRTT;
|
|
|
|
// Record the minimum seen delay (hopefully ignores jitter) and let
|
|
// the congestion control do its thing.
|
|
//
|
|
// Note: We are delay based rather than loss based, which means we
|
|
// need to look at pongs even if they weren't limited by the
|
|
// current window ("congested"). Otherwise we will fail to
|
|
// detect increasing congestion until the application exceeds
|
|
// the congestion window.
|
|
if (rtt < minRTT)
|
|
minRTT = rtt;
|
|
if (rttInfo.congested) {
|
|
if (rtt < minCongestedRTT)
|
|
minCongestedRTT = rtt;
|
|
}
|
|
|
|
measurements++;
|
|
updateCongestion();
|
|
}
|
|
|
|
bool Congestion::isCongested()
|
|
{
|
|
if (getInFlight() < congWindow)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
int Congestion::getUncongestedETA()
|
|
{
|
|
unsigned targetAcked;
|
|
|
|
const struct RTTInfo* prevPing;
|
|
unsigned eta, elapsed;
|
|
unsigned etaNext, delay;
|
|
|
|
std::list<struct RTTInfo>::const_iterator iter;
|
|
|
|
targetAcked = lastPosition - congWindow;
|
|
|
|
// Simple case?
|
|
if (isAfter(lastPong.pos, targetAcked))
|
|
return 0;
|
|
|
|
// No measurements yet?
|
|
if (baseRTT == (unsigned)-1)
|
|
return -1;
|
|
|
|
prevPing = &lastPong;
|
|
eta = 0;
|
|
elapsed = msSince(&lastPongArrival);
|
|
|
|
// Walk the ping queue and figure out which one we are waiting for to
|
|
// get to an uncongested state
|
|
|
|
for (iter = pings.begin(); ;++iter) {
|
|
struct RTTInfo curPing;
|
|
|
|
// If we aren't waiting for a pong that will clear the congested
|
|
// state then we have to estimate the final bit by pretending that
|
|
// we had a ping just after the last position update.
|
|
if (iter == pings.end()) {
|
|
curPing.tv = lastUpdate;
|
|
curPing.pos = lastPosition;
|
|
curPing.extra = extraBuffer;
|
|
} else {
|
|
curPing = *iter;
|
|
}
|
|
|
|
etaNext = msBetween(&prevPing->tv, &curPing.tv);
|
|
// Compensate for buffering delays
|
|
delay = curPing.extra * baseRTT / congWindow;
|
|
etaNext += delay;
|
|
delay = prevPing->extra * baseRTT / congWindow;
|
|
if (delay >= etaNext)
|
|
etaNext = 0;
|
|
else
|
|
etaNext -= delay;
|
|
|
|
// Found it?
|
|
if (isAfter(curPing.pos, targetAcked)) {
|
|
eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
|
|
if (elapsed > eta)
|
|
return 0;
|
|
else
|
|
return eta - elapsed;
|
|
}
|
|
|
|
assert(iter != pings.end());
|
|
|
|
eta += etaNext;
|
|
prevPing = &*iter;
|
|
}
|
|
}
|
|
|
|
size_t Congestion::getBandwidth()
|
|
{
|
|
// No measurements yet? Guess RTT of 60 ms
|
|
if (safeBaseRTT == (unsigned)-1)
|
|
return congWindow * 1000 / 60;
|
|
|
|
return congWindow * 1000 / safeBaseRTT;
|
|
}
|
|
|
|
void Congestion::debugTrace(const char* filename, int fd)
|
|
{
|
|
#ifdef CONGESTION_TRACE
|
|
#ifdef __linux__
|
|
FILE *f;
|
|
f = fopen(filename, "ab");
|
|
if (f != NULL) {
|
|
struct tcp_info info;
|
|
int buffered;
|
|
socklen_t len;
|
|
len = sizeof(info);
|
|
if ((getsockopt(fd, IPPROTO_TCP,
|
|
TCP_INFO, &info, &len) == 0) &&
|
|
(ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
|
|
struct timeval now;
|
|
gettimeofday(&now, NULL);
|
|
fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
|
|
(unsigned)now.tv_sec, (unsigned)now.tv_usec,
|
|
congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
|
|
getInFlight(), buffered);
|
|
}
|
|
fclose(f);
|
|
}
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
unsigned Congestion::getExtraBuffer()
|
|
{
|
|
unsigned elapsed;
|
|
unsigned consumed;
|
|
|
|
if (baseRTT == (unsigned)-1)
|
|
return 0;
|
|
|
|
elapsed = msSince(&lastUpdate);
|
|
consumed = elapsed * congWindow / baseRTT;
|
|
|
|
if (consumed >= extraBuffer)
|
|
return 0;
|
|
else
|
|
return extraBuffer - consumed;
|
|
}
|
|
|
|
unsigned Congestion::getInFlight()
|
|
{
|
|
struct RTTInfo nextPong;
|
|
unsigned etaNext, delay, elapsed, acked;
|
|
|
|
// Simple case?
|
|
if (lastPosition == lastPong.pos)
|
|
return 0;
|
|
|
|
// No measurements yet?
|
|
if (baseRTT == (unsigned)-1) {
|
|
if (!pings.empty())
|
|
return lastPosition - pings.front().pos;
|
|
return 0;
|
|
}
|
|
|
|
// If we aren't waiting for any pong then we have to estimate things
|
|
// by pretending that we had a ping just after the last position
|
|
// update.
|
|
if (pings.empty()) {
|
|
nextPong.tv = lastUpdate;
|
|
nextPong.pos = lastPosition;
|
|
nextPong.extra = extraBuffer;
|
|
} else {
|
|
nextPong = pings.front();
|
|
}
|
|
|
|
// First we need to estimate how many bytes have made it through
|
|
// completely. Look at the next ping that should arrive and figure
|
|
// out how far behind it should be and interpolate the positions.
|
|
|
|
etaNext = msBetween(&lastPong.tv, &nextPong.tv);
|
|
// Compensate for buffering delays
|
|
delay = nextPong.extra * baseRTT / congWindow;
|
|
etaNext += delay;
|
|
delay = lastPong.extra * baseRTT / congWindow;
|
|
if (delay >= etaNext)
|
|
etaNext = 0;
|
|
else
|
|
etaNext -= delay;
|
|
|
|
elapsed = msSince(&lastPongArrival);
|
|
|
|
// The pong should be here any second. Be optimistic and assume
|
|
// we can already use its value.
|
|
if (etaNext <= elapsed)
|
|
acked = nextPong.pos;
|
|
else {
|
|
acked = lastPong.pos;
|
|
acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
|
|
}
|
|
|
|
return lastPosition - acked;
|
|
}
|
|
|
|
void Congestion::updateCongestion()
|
|
{
|
|
unsigned diff;
|
|
|
|
// We want at least three measurements to avoid noise
|
|
if (measurements < 3)
|
|
return;
|
|
|
|
assert(minRTT >= baseRTT);
|
|
assert(minCongestedRTT >= baseRTT);
|
|
|
|
// The goal is to have a slightly too large congestion window since
|
|
// a "perfect" one cannot be distinguished from a too small one. This
|
|
// translates to a goal of a few extra milliseconds of delay.
|
|
|
|
diff = minRTT - baseRTT;
|
|
|
|
if (diff > __rfbmax(100, baseRTT/2)) {
|
|
// We have no way of detecting loss, so assume massive latency
|
|
// spike means packet loss. Adjust the window and go directly
|
|
// to congestion avoidance.
|
|
#ifdef CONGESTION_DEBUG
|
|
vlog.debug("Latency spike! Backing off...");
|
|
#endif
|
|
congWindow = congWindow * baseRTT / minRTT;
|
|
inSlowStart = false;
|
|
}
|
|
|
|
if (inSlowStart) {
|
|
// Slow start. Aggressive growth until we see congestion.
|
|
|
|
if (diff > 25) {
|
|
// If we see an increased latency then we assume we've hit the
|
|
// limit and it's time to leave slow start and switch to
|
|
// congestion avoidance
|
|
congWindow = congWindow * baseRTT / minRTT;
|
|
inSlowStart = false;
|
|
} else {
|
|
// It's not safe to increase unless we actually used the entire
|
|
// congestion window, hence we look at minCongestedRTT and not
|
|
// minRTT
|
|
|
|
diff = minCongestedRTT - baseRTT;
|
|
if (diff < 25)
|
|
congWindow *= 2;
|
|
}
|
|
} else {
|
|
// Congestion avoidance (VEGAS)
|
|
|
|
if (diff > 50) {
|
|
// Slightly too fast
|
|
congWindow -= 4096;
|
|
} else {
|
|
// Only the "congested" pongs are checked to see if the
|
|
// window is too small.
|
|
|
|
diff = minCongestedRTT - baseRTT;
|
|
|
|
if (diff < 5) {
|
|
// Way too slow
|
|
congWindow += 8192;
|
|
} else if (diff < 25) {
|
|
// Too slow
|
|
congWindow += 4096;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (congWindow < MINIMUM_WINDOW)
|
|
congWindow = MINIMUM_WINDOW;
|
|
if (congWindow > MAXIMUM_WINDOW)
|
|
congWindow = MAXIMUM_WINDOW;
|
|
|
|
#ifdef CONGESTION_DEBUG
|
|
vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
|
|
minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
|
|
congWindow * 8.0 / baseRTT / 1000.0,
|
|
inSlowStart ? " (slow start)" : "");
|
|
#endif
|
|
|
|
measurements = 0;
|
|
gettimeofday(&lastAdjustment, NULL);
|
|
minRTT = minCongestedRTT = -1;
|
|
}
|
|
|