Merge branch 'feature/KASM-6788-threading_2' into 'master'

KASM-6788 Refactor EncodeManager to use std::execution with parallel...

Closes KASM-6788

See merge request kasm-technologies/internal/KasmVNC!168
This commit is contained in:
Matthew McClaskey 2025-04-09 13:57:45 +00:00
commit dc75e98344
10 changed files with 129 additions and 86 deletions

View File

@ -21,7 +21,7 @@ include(CheckCSourceRuns)
include(CMakeMacroLibtoolFile)
project(kasmvnc)
project(kasmvnc LANGUAGES C CXX)
set(VERSION 0.9)
# The RC version must always be four comma-separated numbers
@ -74,13 +74,10 @@ set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} -UNDEBUG")
# Make sure we get a sane C version
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99")
set(CMAKE_CXX_STANDARD 20)
# Enable OpenMP
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
# Enable C++ 11
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++11")
# Tell the compiler to be stringent
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wformat=2")

View File

@ -12,9 +12,22 @@ RUN apt-get update && \
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata
RUN apt-get update && apt-get -y build-dep xorg-server libxfont-dev
RUN apt-get update && apt-get -y install cmake git libgnutls28-dev vim wget tightvncserver curl
RUN apt-get update && apt-get -y install git libgnutls28-dev vim wget tightvncserver curl
RUN apt-get update && apt-get -y install libpng-dev libtiff-dev libgif-dev libavcodec-dev libssl-dev libxrandr-dev libxcursor-dev
RUN CMAKE_URL="https://cmake.org/files/v3.22/cmake-3.22.0" && \
ARCH=$(arch) && \
if [ "$ARCH" = "x86_64" ]; then \
CMAKE_URL="${CMAKE_URL}-linux-x86_64.sh"; \
elif [ "$ARCH" = "aarch64" ]; then \
CMAKE_URL="${CMAKE_URL}-linux-aarch64.sh"; \
else \
echo "Unsupported architecture: $ARCH" && exit 1; \
fi && \
curl -fsSL $CMAKE_URL -o cmake.sh && \
(echo y; echo n) | bash cmake.sh --prefix=/usr/local --skip-license && \
rm cmake.sh
ENV SCRIPTS_DIR=/tmp/scripts
COPY builder/scripts $SCRIPTS_DIR
RUN $SCRIPTS_DIR/build-webp

View File

@ -13,8 +13,8 @@ RUN zypper install -ny \
ffmpeg-4-libavcodec-devel \
fonttosfnt \
font-util \
gcc \
gcc-c++ \
gcc14 \
gcc14-c++ \
giflib-devel \
git \
gzip \
@ -45,17 +45,30 @@ RUN zypper install -ny \
xorg-x11-util-devel \
zlib-devel
ENV SCRIPTS_DIR=/tmp/scripts
COPY builder/scripts $SCRIPTS_DIR
RUN $SCRIPTS_DIR/build-webp
RUN $SCRIPTS_DIR/build-libjpeg-turbo
RUN update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++-14 100
RUN useradd -u 1000 docker && \
groupadd -g 1000 docker && \
usermod -a -G docker docker
RUN ARCH=$(arch) && \
CMAKE_URL="https://cmake.org/files/v3.22/cmake-3.22.0" && \
if [ "$ARCH" = "x86_64" ]; then \
CMAKE_URL="${CMAKE_URL}-linux-x86_64.sh"; \
elif [ "$ARCH" = "aarch64" ]; then \
CMAKE_URL="${CMAKE_URL}-linux-aarch64.sh"; \
else \
echo "Unsupported architecture: $ARCH" && exit 1; \
fi && \
curl -fsSL $CMAKE_URL -o cmake.sh && \
(echo y; echo n) | bash cmake.sh --prefix=/usr/local --skip-license && \
rm cmake.sh
ENV SCRIPTS_DIR=/tmp/scripts
COPY builder/scripts $SCRIPTS_DIR
RUN $SCRIPTS_DIR/build-webp && $SCRIPTS_DIR/build-libjpeg-turbo
COPY --chown=docker:docker . /src/
USER docker
ENTRYPOINT ["/src/builder/build.sh"]
ENTRYPOINT ["bash", "-l", "-c", "/src/builder/build.sh"]

View File

@ -15,6 +15,7 @@ RUN \
dnf-plugins-core \
gcc \
gcc-c++ \
gcc-toolset-14 \
git \
gnutls-devel \
libjpeg-turbo-devel \
@ -38,6 +39,7 @@ RUN dnf install -y --nogpgcheck https://mirrors.rpmfusion.org/free/el/rpmfusion-
# Install from new repos
RUN dnf install -y \
tbb-devel \
ffmpeg-devel \
giflib-devel \
lbzip2 \
@ -48,16 +50,16 @@ RUN dnf install -y \
xorg-x11-xtrans-devel \
libXrandr-devel \
libXtst-devel \
libXcursor-devel
libXcursor-devel \
libSM-devel
ENV SCRIPTS_DIR=/tmp/scripts
COPY builder/scripts $SCRIPTS_DIR
RUN $SCRIPTS_DIR/build-webp
RUN $SCRIPTS_DIR/build-libjpeg-turbo
RUN useradd -m docker && echo "docker:docker" | chpasswd
RUN echo "source /opt/rh/gcc-toolset-14/enable" > /etc/profile.d/gcc-toolset.sh && \
$SCRIPTS_DIR/build-webp && $SCRIPTS_DIR/build-libjpeg-turbo && \
useradd -m docker && echo "docker:docker" | chpasswd
COPY --chown=docker:docker . /src/
USER docker
ENTRYPOINT ["/src/builder/build.sh"]
ENTRYPOINT ["bash", "-l", "-c", "/src/builder/build.sh"]

View File

@ -15,6 +15,7 @@ RUN \
dnf-plugins-core \
gcc \
gcc-c++ \
gcc-toolset-14 \
git \
gnutls-devel \
libjpeg-turbo-devel \
@ -47,17 +48,16 @@ RUN dnf install -y \
xorg-x11-xtrans-devel \
libXrandr-devel \
libXtst-devel \
libXcursor-devel
libXcursor-devel \
libSM-devel
ENV SCRIPTS_DIR=/tmp/scripts
COPY builder/scripts $SCRIPTS_DIR
RUN $SCRIPTS_DIR/build-webp
RUN $SCRIPTS_DIR/build-libjpeg-turbo
RUN useradd -m docker && echo "docker:docker" | chpasswd
RUN echo "source /opt/rh/gcc-toolset-14/enable" > /etc/profile.d/gcc-toolset.sh && \
$SCRIPTS_DIR/build-webp && $SCRIPTS_DIR/build-libjpeg-turbo && \
useradd -m docker && echo "docker:docker" | chpasswd
COPY --chown=docker:docker . /src/
USER docker
ENTRYPOINT ["/src/builder/build.sh"]
ENTRYPOINT ["bash", "-l", "-c", "/src/builder/build.sh"]

View File

@ -13,7 +13,7 @@ RUN apt-get update && \
RUN apt-get update && apt-get install -y --no-install-recommends tzdata
RUN apt-get update && apt-get -y build-dep xorg-server libxfont-dev
RUN apt-get update && apt-get -y install cmake git vim wget curl
RUN apt-get update && apt-get -y install libpng-dev libtiff-dev libgif-dev libavcodec-dev libssl-dev libxrandr-dev libxcursor-dev
RUN apt-get update && apt-get -y install libtbb-dev libpng-dev libtiff-dev libgif-dev libavcodec-dev libssl-dev libxrandr-dev libxcursor-dev
ENV SCRIPTS_DIR=/tmp/scripts
COPY builder/scripts $SCRIPTS_DIR
@ -22,6 +22,19 @@ RUN $SCRIPTS_DIR/build-libjpeg-turbo
RUN useradd -m docker && echo "docker:docker" | chpasswd && adduser docker sudo
RUN ARCH=$(arch) && \
CMAKE_URL="https://cmake.org/files/v3.22/cmake-3.22.0" && \
if [ "$ARCH" = "x86_64" ]; then \
CMAKE_URL="${CMAKE_URL}-linux-x86_64.sh"; \
elif [ "$ARCH" = "aarch64" ]; then \
CMAKE_URL="${CMAKE_URL}-linux-aarch64.sh"; \
else \
echo "Unsupported architecture: $ARCH" && exit 1; \
fi && \
curl -fsSL $CMAKE_URL -o cmake.sh && \
(echo y; echo n) | bash cmake.sh --prefix=/usr/local --skip-license && \
rm cmake.sh
COPY --chown=docker:docker . /src/
USER docker

View File

@ -3,7 +3,7 @@ FROM ubuntu:focal
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && \
apt-get -y install vim build-essential devscripts equivs
apt-get -y install vim build-essential devscripts equivs libtbb-dev
# Install build-deps for the package.
COPY ./debian/control /tmp

View File

@ -82,6 +82,12 @@ endif(WIN32)
set(RFB_LIBRARIES ${JPEG_LIBRARIES} ${PNG_LIBRARIES} os rdr Xregion)
cmake_host_system_information(RESULT DISTRO QUERY DISTRIB_INFO)
if ((CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10) OR
(DISTRO_PLATFORM_ID MATCHES "platform:el8"))
set(RFB_LIBRARIES ${RFB_LIBRARIES} tbb)
endif ()
if(HAVE_PAM)
set(RFB_SOURCES ${RFB_SOURCES} UnixPasswordValidator.cxx
UnixPasswordValidator.h pam.c pam.h)

View File

@ -19,8 +19,7 @@
* USA.
*/
#include <omp.h>
#include <stdlib.h>
#include <cstdlib>
#include <rfb/cpuid.h>
#include <rfb/EncCache.h>
@ -44,6 +43,7 @@
#include <rfb/TightJPEGEncoder.h>
#include <rfb/TightWEBPEncoder.h>
#include <rfb/TightQOIEncoder.h>
#include <execution>
using namespace rfb;
@ -93,9 +93,9 @@ struct RectInfo {
};
struct QualityInfo {
struct timeval lastUpdate;
struct timeval lastUpdate{};
Rect rect;
unsigned score;
unsigned score{};
};
};
@ -880,14 +880,12 @@ void EncodeManager::findSolidRect(const Rect& rect, Region *changed,
}
}
void EncodeManager::checkWebpFallback(const struct timeval *start) {
void EncodeManager::checkWebpFallback(const timeval *start) {
// Have we taken too long for the frame? If so, drop from WEBP to JPEG
if (start && activeEncoders[encoderFullColour] == encoderTightWEBP && !webpTookTooLong) {
unsigned us;
us = msSince(start) * 1024;
if (start && activeEncoders[encoderFullColour] == encoderTightWEBP && !webpTookTooLong.load(std::memory_order_relaxed)) {
const auto us = msSince(start) * 1024;
if (us > webpFallbackUs)
#pragma omp atomic
webpTookTooLong |= true;
webpTookTooLong.store(true, std::memory_order_relaxed);
}
}
@ -1122,18 +1120,13 @@ void EncodeManager::writeRects(const Region& changed, const PixelBuffer* pb,
const bool mainScreen)
{
std::vector<Rect> rects, subrects, scaledrects;
std::vector<Rect>::const_iterator rect;
std::vector<uint8_t> encoderTypes;
std::vector<uint8_t> isWebp, fromCache;
std::vector<Palette> palettes;
std::vector<std::vector<uint8_t> > compresseds;
std::vector<uint32_t> ms;
uint32_t i;
if (rfb::Server::rectThreads > 0)
omp_set_num_threads(rfb::Server::rectThreads);
webpTookTooLong = false;
webpTookTooLong.store(false, std::memory_order_relaxed);
changed.get_rects(&rects);
// Update stats
@ -1148,18 +1141,18 @@ void EncodeManager::writeRects(const Region& changed, const PixelBuffer* pb,
subrects.reserve(rects.size() * 1.5f);
for (rect = rects.begin(); rect != rects.end(); ++rect) {
int w, h, sw, sh;
for (const auto& rect : rects) {
int sw, sh;
Rect sr;
w = rect->width();
h = rect->height();
const auto w = rect.width();
const auto h = rect.height();
// No split necessary?
if ((((w*h) < SubRectMaxArea) && (w < SubRectMaxWidth)) ||
(videoDetected && !encoders[encoderTightWEBP]->isSupported())) {
subrects.push_back(*rect);
trackRectQuality(*rect);
subrects.push_back(rect);
trackRectQuality(rect);
continue;
}
@ -1170,15 +1163,15 @@ void EncodeManager::writeRects(const Region& changed, const PixelBuffer* pb,
sh = SubRectMaxArea / sw;
for (sr.tl.y = rect->tl.y; sr.tl.y < rect->br.y; sr.tl.y += sh) {
for (sr.tl.y = rect.tl.y; sr.tl.y < rect.br.y; sr.tl.y += sh) {
sr.br.y = sr.tl.y + sh;
if (sr.br.y > rect->br.y)
sr.br.y = rect->br.y;
if (sr.br.y > rect.br.y)
sr.br.y = rect.br.y;
for (sr.tl.x = rect->tl.x; sr.tl.x < rect->br.x; sr.tl.x += sw) {
for (sr.tl.x = rect.tl.x; sr.tl.x < rect.br.x; sr.tl.x += sw) {
sr.br.x = sr.tl.x + sw;
if (sr.br.x > rect->br.x)
sr.br.x = rect->br.x;
if (sr.br.x > rect.br.x)
sr.br.x = rect.br.x;
subrects.push_back(sr);
trackRectQuality(sr);
@ -1186,13 +1179,18 @@ void EncodeManager::writeRects(const Region& changed, const PixelBuffer* pb,
}
}
encoderTypes.resize(subrects.size());
isWebp.resize(subrects.size());
fromCache.resize(subrects.size());
palettes.resize(subrects.size());
compresseds.resize(subrects.size());
scaledrects.resize(subrects.size());
ms.resize(subrects.size());
const size_t subrects_size = subrects.size();
std::vector<size_t> indices(subrects_size);
std::iota(std::begin(indices), std::end(indices), 0);
encoderTypes.resize(subrects_size);
isWebp.resize(subrects_size);
fromCache.resize(subrects_size);
palettes.resize(subrects_size);
compresseds.resize(subrects_size);
scaledrects.resize(subrects_size);
ms.resize(subrects_size);
// In case the current resolution is above the max video res, and video was detected,
// scale to that res, keeping aspect ratio
@ -1224,7 +1222,7 @@ void EncodeManager::writeRects(const Region& changed, const PixelBuffer* pb,
break;
}
for (i = 0; i < subrects.size(); ++i) {
for (uint32_t i = 0; i < subrects_size; ++i) {
const Rect old = scaledrects[i] = subrects[i];
scaledrects[i].br.x *= diff;
scaledrects[i].br.y *= diff;
@ -1249,15 +1247,15 @@ void EncodeManager::writeRects(const Region& changed, const PixelBuffer* pb,
}
scalingTime = msSince(&scalestart);
#pragma omp parallel for schedule(dynamic, 1)
for (i = 0; i < subrects.size(); ++i) {
encoderTypes[i] = getEncoderType(subrects[i], pb, &palettes[i], compresseds[i],
&isWebp[i], &fromCache[i],
scaledpb, scaledrects[i], ms[i]);
checkWebpFallback(start);
}
std::for_each(std::execution::par_unseq, std::begin(indices), std::end(indices), [&](size_t i)
{
encoderTypes[i] = getEncoderType(subrects[i], pb, &palettes[i], compresseds[i],
&isWebp[i], &fromCache[i],
scaledpb, scaledrects[i], ms[i]);
checkWebpFallback(start);
});
for (i = 0; i < subrects.size(); ++i) {
for (uint32_t i = 0; i < subrects_size; ++i) {
if (encoderTypes[i] == encoderFullColour) {
if (isWebp[i])
webpstats.ms += ms[i];
@ -1269,7 +1267,7 @@ void EncodeManager::writeRects(const Region& changed, const PixelBuffer* pb,
if (start) {
encodingTime = msSince(start);
if (vlog.getLevel() >= vlog.LEVEL_DEBUG) {
if (vlog.getLevel() >= rfb::LogWriter::LEVEL_DEBUG) {
framesSinceEncPrint++;
if (maxEncodingTime < encodingTime)
maxEncodingTime = encodingTime;
@ -1284,11 +1282,11 @@ void EncodeManager::writeRects(const Region& changed, const PixelBuffer* pb,
}
}
if (webpTookTooLong)
if (webpTookTooLong.load(std::memory_order_relaxed))
activeEncoders[encoderFullColour] = encoderTightJPEG;
for (i = 0; i < subrects.size(); ++i) {
if (encCache->enabled && compresseds[i].size() && !fromCache[i] &&
for (uint32_t i = 0; i < subrects_size; ++i) {
if (encCache->enabled && !compresseds[i].empty() && !fromCache[i] &&
!encoders[encoderTightQOI]->isSupported()) {
void *tmp = malloc(compresseds[i].size());
memcpy(tmp, &compresseds[i][0], compresseds[i].size());

View File

@ -29,9 +29,9 @@
#include <rfb/Region.h>
#include <rfb/Timer.h>
#include <rfb/UpdateTracker.h>
#include <rfb/util.h>
#include <stdint.h>
#include <atomic>
#include <sys/time.h>
namespace rfb {
@ -50,7 +50,7 @@ namespace rfb {
class EncodeManager: public Timer::Callback {
public:
EncodeManager(SConnection* conn, EncCache *encCache);
~EncodeManager();
~EncodeManager() override;
void logStats();
@ -72,10 +72,10 @@ namespace rfb {
encodingTime = 0;
};
unsigned getEncodingTime() const {
[[nodiscard]] unsigned getEncodingTime() const {
return encodingTime;
};
unsigned getScalingTime() const {
[[nodiscard]] unsigned getScalingTime() const {
return scalingTime;
};
@ -124,7 +124,8 @@ namespace rfb {
uint8_t *fromCache,
const PixelBuffer *scaledpb, const Rect& scaledrect,
uint32_t &ms) const;
virtual bool handleTimeout(Timer* t);
bool handleTimeout(Timer* t) override;
bool checkSolidTile(const Rect& r, const rdr::U8* colourValue,
const PixelBuffer *pb);
@ -199,7 +200,7 @@ namespace rfb {
size_t curMaxUpdateSize;
unsigned webpFallbackUs;
unsigned webpBenchResult;
bool webpTookTooLong;
std::atomic<bool> webpTookTooLong{false};
unsigned encodingTime;
unsigned maxEncodingTime, framesSinceEncPrint;
unsigned scalingTime;
@ -208,14 +209,14 @@ namespace rfb {
class OffsetPixelBuffer : public FullFramePixelBuffer {
public:
OffsetPixelBuffer() {}
virtual ~OffsetPixelBuffer() {}
OffsetPixelBuffer() = default;
~OffsetPixelBuffer() override = default;
void update(const PixelFormat& pf, int width, int height,
const rdr::U8* data_, int stride);
private:
virtual rdr::U8* getBufferRW(const Rect& r, int* stride);
rdr::U8* getBufferRW(const Rect& r, int* stride) override;
};
};