Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ There are also additional environment variables that may change the behavior of
| `GIT_OPT_SET_SERVER_CONNECT_TIMEOUT`| `int` | Timeout to attempt connections to a remote server. Default value 4000 ms. |
| `GIT_OPT_SET_SERVER_TIMEOUT` | `int` | Timeout for reading from and writing to a remote server. Default value 4000 ms. |
| `GIT_OPT_SET_SSL_CERT_LOCATIONS` | `string`| Path to check for ssl certificates. |
| `GIT_OPT_SET_ENABLE_SEARCH_PATHS`| `int` | When set to 1, the pull functionality reads host-level git configuration locations like ~/.gitconfig. Default value 0. |

Task specific parameters for different tasks (text generation/image generation/embeddings/rerank) are listed below:

Expand Down
120 changes: 102 additions & 18 deletions src/pull_module/libgit2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "cmd_exec.hpp"
#include "src/filesystem/filesystem.hpp"
#include "src/filesystem/localfilesystem.hpp"
#include "src/utils/env_guard.hpp"
#include "../logging.hpp"
#include "../shutdown_state.hpp"
#include "../stringutils.hpp"
Expand Down Expand Up @@ -280,6 +281,43 @@ Libgt2InitGuard::Libgt2InitGuard(const Libgit2Options& opts) {
SPDLOG_DEBUG("Initializing libgit2");
this->status = git_libgit2_init();
IF_ERROR_SET_MSG_AND_RETURN();
// Disable ownership check so repositories owned by a different OS user can be
// opened for reading (equivalent to git's `safe.directory = *`). This is safe
// in a serving context where the operator intentionally mounts model directories
// that may have been downloaded by a different UID (e.g. root in the build
// container vs. a non-root serving user).
this->status = git_libgit2_opts(GIT_OPT_SET_OWNER_VALIDATION, 0);
IF_ERROR_SET_MSG_AND_RETURN();
const bool enableGitSearchPath = (GetEnvVar("GIT_OPT_SET_ENABLE_SEARCH_PATHS") == "1");
// By default, redirect all git config search paths to an empty string so libgit2
// never reads host-level git configuration (~/.gitconfig, /etc/gitconfig, etc.).
// Without this, a host gitconfig that sets credential.helper, http.proxy, lfs.*, or
// safe.directory can silently override OVMS's intended proxy/token settings and cause
// spurious failures or credential leaks in multi-tenant environments.
// To preserve historic behaviour or troubleshoot host gitconfig interactions, set
// GIT_OPT_SET_ENABLE_SEARCH_PATHS=1 and skip this isolation step.
if (!enableGitSearchPath) {
this->status = git_libgit2_opts(GIT_OPT_SET_SEARCH_PATH, GIT_CONFIG_LEVEL_SYSTEM, "");
IF_ERROR_SET_MSG_AND_RETURN();
this->status = git_libgit2_opts(GIT_OPT_SET_SEARCH_PATH, GIT_CONFIG_LEVEL_XDG, "");
IF_ERROR_SET_MSG_AND_RETURN();
this->status = git_libgit2_opts(GIT_OPT_SET_SEARCH_PATH, GIT_CONFIG_LEVEL_GLOBAL, "");
IF_ERROR_SET_MSG_AND_RETURN();
#if defined(_WIN32)
// On Windows, GIT_CONFIG_LEVEL_PROGRAMDATA covers %PROGRAMDATA%\Git\config.
// Keep it isolated unless explicit opt-in via GIT_OPT_SET_ENABLE_SEARCH_PATHS=1.
this->status = git_libgit2_opts(GIT_OPT_SET_SEARCH_PATH, GIT_CONFIG_LEVEL_PROGRAMDATA, "");
IF_ERROR_SET_MSG_AND_RETURN();
#endif
Comment thread
atobiszei marked this conversation as resolved.
}
// Skip .keep file existence checks when reading packfiles. libgit2 performs one
// stat() per pack per operation to honour .keep files (which prevent gc from
// collecting referenced packs). In an OVMS deployment the model directory is
// never garbage-collected and may live on NFS or other high-latency remote
// filesystems, so removing this stat() per open noticeably reduces latency on
// resume/status operations against large repositories.
this->status = git_libgit2_opts(GIT_OPT_DISABLE_PACK_KEEP_FILE_CHECKS, 1);
IF_ERROR_SET_MSG_AND_RETURN();
SPDLOG_TRACE("Setting libgit2 server connection timeout:{}", opts.serverConnectTimeoutMs);
this->status = git_libgit2_opts(GIT_OPT_SET_SERVER_CONNECT_TIMEOUT, opts.serverConnectTimeoutMs);
IF_ERROR_SET_MSG_AND_RETURN();
Expand Down Expand Up @@ -1123,6 +1161,7 @@ Status resumeLfsDownloadForFile(git_repository* repo, const char* filePathInRepo
namespace {

struct ResumeCandidates {
bool shouldResume = false;
bool hasWipMarker = false;
bool hasLfsErrorFile = false;
bool interruptionLikely = false;
Expand All @@ -1131,18 +1170,16 @@ struct ResumeCandidates {
};

/**
* Builds resume candidate lists based on interruption markers and repository scan.
* Populates resume candidate lists based on interruption markers and repository scan.
*
* @param repo Pointer to the git repository object.
* @param downloadPath Repository worktree root path.
* @return ResumeCandidates containing LFS and non-LFS recovery targets.
* @param candidates [in/out] Resume candidates to populate; hasWipMarker and hasLfsErrorFile
* must already be set by the caller. On return, lfsMatches, missingNonLfsMatches,
* and interruptionLikely are filled in.
* @note Works on local repository metadata and filesystem; no network operations.
*/
ResumeCandidates buildResumeCandidates(git_repository* repo, const std::string& downloadPath) {
ResumeCandidates candidates;
candidates.hasWipMarker = libgit2::hasLfsWipMarker(downloadPath);
candidates.hasLfsErrorFile = libgit2::hasLfsErrorFile(downloadPath);

void buildResumeCandidates(git_repository* repo, const std::string& downloadPath, ResumeCandidates& candidates) {
// Checking if the download was partially finished for any files in repository,
// including tracked LFS pointer blobs missing from the worktree after abrupt termination.
candidates.lfsMatches = libgit2::findResumableLfsFiles(repo, downloadPath, candidates.hasWipMarker || candidates.hasLfsErrorFile);
Expand All @@ -1151,7 +1188,6 @@ ResumeCandidates buildResumeCandidates(git_repository* repo, const std::string&
}

candidates.interruptionLikely = candidates.hasWipMarker || candidates.hasLfsErrorFile || !candidates.lfsMatches.empty();
return candidates;
}

void printResumeCandidates(const ResumeCandidates& candidates) {
Expand Down Expand Up @@ -1276,14 +1312,26 @@ Status resumeExistingRepository(git_repository* repo,
return StatusCode::OK;
}

Status handleExistingRepositoryWithoutOverwrite(const std::string& downloadPath,
const std::function<Status(bool)>& checkRepositoryStatusFn) {
// If the directory does not contain a .git entry, treat it as a user-provided model directory.
// The user has copied model files in by hand; skip the pull and let model loading proceed
// against whatever files are already on disk. Use --overwrite_models to replace it with a
// fresh download.
bool hasResumableStateBasedOnFiles(const std::string& downloadPath, const ResumeCandidates& candidates) {
auto existingMatches = ovms::libgit2::findLfsLikeFiles(downloadPath, true);

// Use repository object only when interruption markers indicate a previous
// pull likely failed and resume logic may be required.
if (!candidates.hasWipMarker && !candidates.hasLfsErrorFile && existingMatches.empty()) {
SPDLOG_DEBUG("Model pull operation found no interruption markers for this path: {}", downloadPath);
SPDLOG_INFO("Path already exists on local filesystem. Skipping download to path: {}", downloadPath);
return false;
}

if (!existingMatches.empty()) {
SPDLOG_DEBUG("Found {} LFS-like file(s) under path: {}. Enabling resume check.", existingMatches.size(), downloadPath);
}
return true;
}

Status checkGitEntryExists(const std::string& downloadPath, bool& gitEntryExists) {
std::error_code ec;
const bool gitEntryExists = fs::exists(fs::path(downloadPath) / ".git", ec);
gitEntryExists = fs::exists(fs::path(downloadPath) / ".git", ec);
if (ec) {
// Probe itself failed (permission denied, I/O error, ...). Do not silently fall through
// to the "not a git repository" branch, that would mask real filesystem problems.
Expand All @@ -1294,6 +1342,42 @@ Status handleExistingRepositoryWithoutOverwrite(const std::string& downloadPath,
SPDLOG_INFO("Path \"{}\" exists but is not a git repository. "
"Skipping download and using existing files.",
downloadPath);
}
return StatusCode::OK;
}

Status checkSufficientResumeConditions(const std::string& downloadPath, ResumeCandidates& candidates) {
candidates.shouldResume = false;

bool gitEntryExists = false;
auto firstConditionStatus = checkGitEntryExists(downloadPath, gitEntryExists);
if (!firstConditionStatus.ok()) {
return firstConditionStatus;
}
if (!gitEntryExists) {
return StatusCode::OK;
}

// Probe interruption markers once and reuse them later when building candidates.
candidates.hasWipMarker = libgit2::hasLfsWipMarker(downloadPath);
candidates.hasLfsErrorFile = libgit2::hasLfsErrorFile(downloadPath);

candidates.shouldResume = hasResumableStateBasedOnFiles(downloadPath, candidates);
return StatusCode::OK;
}

Status handleExistingRepository(const std::string& downloadPath,
const std::function<Status(bool)>& checkRepositoryStatusFn) {
// If the directory does not contain a .git entry, treat it as a user-provided model directory.
// The user has copied model files in by hand; skip the pull and let model loading proceed
// against whatever files are already on disk. Use --overwrite_models to replace it with a
// fresh download.
ResumeCandidates candidates;
auto sufficientConditionsStatus = checkSufficientResumeConditions(downloadPath, candidates);
if (!sufficientConditionsStatus.ok()) {
return sufficientConditionsStatus;
}
if (!candidates.shouldResume) {
return StatusCode::OK;
}

Expand All @@ -1306,9 +1390,9 @@ Status handleExistingRepositoryWithoutOverwrite(const std::string& downloadPath,
return mapRepositoryOpenFailureToStatus(repoGuard);
}

auto candidates = buildResumeCandidates(repoGuard.get(), downloadPath);
buildResumeCandidates(repoGuard.get(), downloadPath, candidates);
if (!candidates.interruptionLikely) {
SPDLOG_DEBUG("Model pull operation found no interruption signals for this path: {}", downloadPath);
SPDLOG_WARN("Interruption marker(s) were found but no resumable candidates were detected for path: {}", downloadPath);
SPDLOG_INFO("Path already exists on local filesystem. Skipping download to path: {}", downloadPath);
return StatusCode::OK;
}
Expand Down Expand Up @@ -1466,7 +1550,7 @@ Status HfDownloader::downloadModel() {

// Repository exists and we do not want to overwrite
if (std::filesystem::is_directory(this->downloadPath) && !this->overwriteModels) {
return handleExistingRepositoryWithoutOverwrite(this->downloadPath, checkRepositoryStatusFn);
return handleExistingRepository(this->downloadPath, checkRepositoryStatusFn);
}

auto status = IModelDownloader::checkIfOverwriteAndRemove();
Expand Down
109 changes: 109 additions & 0 deletions src/test/libgit2_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <gtest/gtest.h>

#include "src/pull_module/libgit2.hpp"
#include "src/utils/env_guard.hpp"

#include "environment.hpp"
#include "test_utils.hpp"
Expand Down Expand Up @@ -849,3 +850,111 @@ TEST(LibGit2LfsWipMarker, MarkersForDifferentRepositoriesAreIndependent) {
EXPECT_FALSE(ovms::libgit2::hasLfsWipMarker(repoAPath));
EXPECT_TRUE(ovms::libgit2::hasLfsWipMarker(repoBPath));
}

// ---------------------------------------------------------------------------
// Libgt2InitGuard initialization behavior
//
// These tests exercise the process-global libgit2 options set in
// Libgt2InitGuard's constructor: ownership-validation suppression and config
// search-path isolation. Each test creates its own guard so that the options
// are set fresh; as git_libgit2_init() is ref-counted by libgit2 it is safe to
// call it multiple times within the same process.
// ---------------------------------------------------------------------------

class Libgt2InitGuardTest : public ::testing::Test {
protected:
ovms::Libgit2Options defaultOpts;

static std::vector<std::string> getCurrentSearchPaths() {
std::vector<std::string> paths;
paths.reserve(4);

git_buf systemBuf = GIT_BUF_INIT;
EXPECT_EQ(git_libgit2_opts(GIT_OPT_GET_SEARCH_PATH, GIT_CONFIG_LEVEL_SYSTEM, &systemBuf), 0);
paths.push_back((systemBuf.ptr != nullptr) ? systemBuf.ptr : "");
git_buf_dispose(&systemBuf);

git_buf xdgBuf = GIT_BUF_INIT;
EXPECT_EQ(git_libgit2_opts(GIT_OPT_GET_SEARCH_PATH, GIT_CONFIG_LEVEL_XDG, &xdgBuf), 0);
paths.push_back((xdgBuf.ptr != nullptr) ? xdgBuf.ptr : "");
git_buf_dispose(&xdgBuf);

git_buf globalBuf = GIT_BUF_INIT;
EXPECT_EQ(git_libgit2_opts(GIT_OPT_GET_SEARCH_PATH, GIT_CONFIG_LEVEL_GLOBAL, &globalBuf), 0);
paths.push_back((globalBuf.ptr != nullptr) ? globalBuf.ptr : "");
git_buf_dispose(&globalBuf);

#if defined(_WIN32)
git_buf programdataBuf = GIT_BUF_INIT;
EXPECT_EQ(git_libgit2_opts(GIT_OPT_GET_SEARCH_PATH, GIT_CONFIG_LEVEL_PROGRAMDATA, &programdataBuf), 0);
paths.push_back((programdataBuf.ptr != nullptr) ? programdataBuf.ptr : "");
git_buf_dispose(&programdataBuf);
#endif

return paths;
}

void expectSearchPathBehavior(const std::optional<std::string>& envValue, bool shouldRemainEnabled) {
EnvGuard envGuard;
if (envValue.has_value()) {
envGuard.set("GIT_OPT_SET_ENABLE_SEARCH_PATHS", envValue.value());
} else {
envGuard.unset("GIT_OPT_SET_ENABLE_SEARCH_PATHS");
}

ASSERT_EQ(git_libgit2_init(), 1);
const auto baselinePaths = getCurrentSearchPaths();
git_libgit2_shutdown();

ovms::Libgt2InitGuard guard(defaultOpts);
ASSERT_GE(guard.status, 0);

const auto currentPaths = getCurrentSearchPaths();

if (shouldRemainEnabled) {
EXPECT_EQ(currentPaths, baselinePaths);
} else {
EXPECT_EQ(currentPaths.size(), baselinePaths.size());
EXPECT_EQ(currentPaths[0], "");
EXPECT_EQ(currentPaths[1], "");
EXPECT_EQ(currentPaths[2], "");
#if defined(_WIN32)
ASSERT_EQ(currentPaths.size(), 4u);
EXPECT_EQ(currentPaths[3], "");
#endif
}
}
};

TEST_F(Libgt2InitGuardTest, ConstructionSucceeds) {
ovms::Libgt2InitGuard guard(defaultOpts);
EXPECT_GE(guard.status, 0);
EXPECT_TRUE(guard.errMsg.empty());
EXPECT_TRUE(guard.countedAsInitialized);
}

// After the guard is constructed, libgit2 must have owner-validation disabled
// so that repositories owned by a different OS user can be opened.
TEST_F(Libgt2InitGuardTest, OwnerValidationIsDisabled) {
ovms::Libgt2InitGuard guard(defaultOpts);
ASSERT_GE(guard.status, 0);

int ownerValidation = 1; // preset to non-zero; guard must set it to 0
int rc = git_libgit2_opts(GIT_OPT_GET_OWNER_VALIDATION, &ownerValidation);
EXPECT_EQ(rc, 0);
EXPECT_EQ(ownerValidation, 0);
}

// The guard must clear the config search paths for all host-level config
// scopes so that no host gitconfig can interfere with OVMS's settings.
TEST_F(Libgt2InitGuardTest, ConfigSearchPathsRemainWhenEnvIsOne) {
expectSearchPathBehavior("1", true);
}

TEST_F(Libgt2InitGuardTest, ConfigSearchPathsAreClearedWhenEnvIsZero) {
expectSearchPathBehavior("0", false);
}

TEST_F(Libgt2InitGuardTest, ConfigSearchPathsAreClearedWhenEnvIsUnset) {
expectSearchPathBehavior(std::nullopt, false);
}
Loading