diff --git a/src/BUILD b/src/BUILD index df50004d5b..bac3cf342d 100644 --- a/src/BUILD +++ b/src/BUILD @@ -2485,6 +2485,7 @@ cc_test( "test/kfs_rest_test.cpp", "test/kfs_rest_parser_test.cpp", "test/layout_test.cpp", + "test/libgit2_test.cpp", "test/metric_config_test.cpp", "test/metrics_test.cpp", "test/metrics_flow_test.cpp", diff --git a/src/pull_module/hf_pull_model_module.hpp b/src/pull_module/hf_pull_model_module.hpp index 2742ac23ca..a306b7bb7a 100644 --- a/src/pull_module/hf_pull_model_module.hpp +++ b/src/pull_module/hf_pull_model_module.hpp @@ -21,7 +21,7 @@ #include "../capi_frontend/server_settings.hpp" namespace ovms { - +struct Libgt2InitGuard; class HfPullModelModule : public Module { protected: HFSettingsImpl hfSettings; @@ -40,4 +40,6 @@ class HfPullModelModule : public Module { static const std::string GIT_SERVER_TIMEOUT_ENV; static const std::string GIT_SSL_CERT_LOCATIONS_ENV; }; + +std::variant> createGuard(); } // namespace ovms diff --git a/src/pull_module/libgit2.cpp b/src/pull_module/libgit2.cpp index 1566805c74..d2985ded17 100644 --- a/src/pull_module/libgit2.cpp +++ b/src/pull_module/libgit2.cpp @@ -15,9 +15,12 @@ //***************************************************************************** #include "libgit2.hpp" +#include #include -#include #include +#include +#include +#include #include #include @@ -45,6 +48,7 @@ #endif namespace ovms { +namespace fs = std::filesystem; // Callback for clone authentication - will be used when password is not set in repo_url // Does not work with LFS download as it requires additional authentication when password is not set in repository url @@ -68,16 +72,16 @@ int cred_acquire_cb(git_credential** out, password = _strdup(username); #endif } else { - fprintf(stderr, "HF_TOKEN env variable is not set.\n"); + fprintf(stderr, "[ERROR] HF_TOKEN env variable is not set.\n"); return -1; } error = git_credential_userpass_plaintext_new(out, username, password); if (error < 0) { - fprintf(stderr, "Creating credentials failed.\n"); + fprintf(stderr, "[ERROR] Creating credentials failed.\n"); error = -1; } } else { - fprintf(stderr, "Only USERPASS_PLAINTEXT supported in OVMS.\n"); + fprintf(stderr, "[ERROR] Only USERPASS_PLAINTEXT supported in OVMS.\n"); return 1; } @@ -179,6 +183,375 @@ Status HfDownloader::RemoveReadonlyFileAttributeFromDir(const std::string& direc return StatusCode::OK; } +class GitRepositoryGuard { +public: + git_repository* repo = nullptr; + int git_error_class = 0; + + GitRepositoryGuard(const std::string& path) { + int error = git_repository_open_ext(&repo, path.c_str(), 0, nullptr); + if (error < 0) { + const git_error* err = git_error_last(); + if (err) { + SPDLOG_ERROR("Repository open failed: {} {}", err->klass, err->message); + git_error_class = err->klass; + } else { + SPDLOG_ERROR("Repository open failed: {}", error); + } + if (repo) + git_repository_free(repo); + } + } + + ~GitRepositoryGuard() { + if (repo) { + git_repository_free(repo); + } + } + + // Allow implicit access to the raw pointer + git_repository* get() const { return repo; } + operator git_repository*() const { return repo; } + + // Non-copyable + GitRepositoryGuard(const GitRepositoryGuard&) = delete; + GitRepositoryGuard& operator=(const GitRepositoryGuard&) = delete; + + // Movable + GitRepositoryGuard(GitRepositoryGuard&& other) noexcept { + repo = other.repo; + other.repo = nullptr; + } + GitRepositoryGuard& operator=(GitRepositoryGuard&& other) noexcept { + if (this != &other) { + if (repo) + git_repository_free(repo); + repo = other.repo; + other.repo = nullptr; + } + return *this; + } +}; + +Status HfDownloader::CheckRepositoryStatus(bool checkUntracked) { + GitRepositoryGuard repoGuard(this->downloadPath); + if (!repoGuard.get()) { + if (repoGuard.git_error_class == 2) + return StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH; + else if (repoGuard.git_error_class == 3) + return StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED; + else + return StatusCode::HF_GIT_STATUS_FAILED; + } + // HEAD state info + bool is_detached = git_repository_head_detached(repoGuard.get()) == 1; + bool is_unborn = git_repository_head_unborn(repoGuard.get()) == 1; + + // Collect status (staged/unstaged/untracked) + git_status_options opts = GIT_STATUS_OPTIONS_INIT; + + opts.show = GIT_STATUS_SHOW_INDEX_AND_WORKDIR; + opts.flags = GIT_STATUS_OPT_INCLUDE_UNTRACKED // include untracked files // | GIT_STATUS_OPT_RENAMES_HEAD_TO_INDEX // detect renames HEAD->index - not required currently and impacts performance + | GIT_STATUS_OPT_SORT_CASE_SENSITIVELY; + + git_status_list* status_list = nullptr; + int error = git_status_list_new(&status_list, repoGuard.get(), &opts); + if (error != 0) { + return StatusCode::HF_GIT_STATUS_FAILED; + } + + size_t staged = 0, unstaged = 0, untracked = 0, conflicted = 0; + const size_t n = git_status_list_entrycount(status_list); // iterate entries + for (size_t i = 0; i < n; ++i) { + const git_status_entry* e = git_status_byindex(status_list, i); + unsigned s = e->status; + + // Staged (index) changes + if (s & (GIT_STATUS_INDEX_NEW | + GIT_STATUS_INDEX_MODIFIED | + GIT_STATUS_INDEX_DELETED | + GIT_STATUS_INDEX_RENAMED | + GIT_STATUS_INDEX_TYPECHANGE)) + ++staged; + + // Unstaged (workdir) changes + if (s & (GIT_STATUS_WT_MODIFIED | + GIT_STATUS_WT_DELETED | + GIT_STATUS_WT_RENAMED | + GIT_STATUS_WT_TYPECHANGE)) + ++unstaged; + + // Untracked + if (s & GIT_STATUS_WT_NEW) + ++untracked; + + // libgit2 will also flag conflicted entries via status/diff machinery + if (s & GIT_STATUS_CONFLICTED) + ++conflicted; + } + + std::stringstream ss; + ss << "HEAD state : " + << (is_unborn ? "unborn (no commits)" : (is_detached ? "detached" : "attached")) + << "\n"; + ss << "Staged changes : " << staged << "\n"; + ss << "Unstaged changes: " << unstaged << "\n"; + ss << "Untracked files : " << untracked << "\n"; + if (conflicted) + ss << " (" << conflicted << " paths flagged)"; + + SPDLOG_DEBUG(ss.str()); + git_status_list_free(status_list); + + // We do not care about untracked until after git clone + if (is_unborn || is_detached || staged || unstaged || conflicted || (checkUntracked && untracked)) { + return StatusCode::HF_GIT_STATUS_UNCLEAN; + } + return StatusCode::OK; +} + +#define CHECK(call) \ + do { \ + int _err = (call); \ + if (_err < 0) { \ + const git_error* e = git_error_last(); \ + fprintf(stderr, "[ERROR] %d: %s (%s:%d)\n", _err, e && e->message ? e->message : "no message", __FILE__, __LINE__); \ + return; \ + } \ + } while (0) + +// Trim trailing '\r' (for CRLF files) and surrounding spaces +void rtrimCrLfWhitespace(std::string& s) { + if (!s.empty() && s.back() == '\r') + s.pop_back(); // remove trailing '\r' + while (!s.empty() && std::isspace(static_cast(s.back()))) + s.pop_back(); // trailing ws + size_t i = 0; + while (i < s.size() && std::isspace(static_cast(s[i]))) + ++i; // leading ws + if (i > 0) + s.erase(0, i); +} + +// Case-insensitive substring search: returns true if 'needle' is found in 'hay' +bool containsCaseInsensitive(const std::string& hay, const std::string& needle) { + auto toLower = [](std::string v) { + std::transform(v.begin(), v.end(), v.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + return v; + }; + std::string hayLower = toLower(hay); + std::string needleLower = toLower(needle); + return hayLower.find(needleLower) != std::string::npos; +} + +// Read at most the first 3 lines of a file, with a per-line cap to avoid huge reads. +// Returns true if successful (even if <3 lines exist; vector will just be shorter). + +bool readFirstThreeLines(const std::filesystem::path& p, std::vector& out) { + out.clear(); + + std::ifstream in(p, std::ios::binary); + if (!in) + return false; + + std::string line; + line.reserve(256); // small optimization + int c; + + while (out.size() < 3 && (c = in.get()) != EOF) { + if (c == '\r') { + // Handle CR or CRLF as one line ending + int next = in.peek(); + if (next == '\n') { + in.get(); // consume '\n' + } + // finalize current line + rtrimCrLfWhitespace(line); + out.push_back(std::move(line)); + line.clear(); + } else if (c == '\n') { + // LF line ending + rtrimCrLfWhitespace(line); + out.push_back(std::move(line)); + line.clear(); + } else { + line.push_back(static_cast(c)); + } + } + + // Handle the last line if file did not end with EOL + if (!line.empty() && out.size() < 3) { + rtrimCrLfWhitespace(line); + out.push_back(std::move(line)); + } + + return true; +} + +// Check if the first 3 lines contain required keywords in positional order: +// line1 -> "version", line2 -> "oid", line3 -> "size" (case-insensitive). +bool fileHasLfsKeywordsFirst3Positional(const fs::path& p) { + std::error_code ec; + if (!fs::is_regular_file(p, ec)) + return false; + + std::vector lines; + if (!readFirstThreeLines(p, lines)) + return false; + + if (lines.size() < 3) + return false; + + return containsCaseInsensitive(lines[0], "version") && + containsCaseInsensitive(lines[1], "oid") && + containsCaseInsensitive(lines[2], "size"); +} + +// Helper: make path relative to base (best-effort, non-throwing). +fs::path makeRelativeToBase(const fs::path& path, const fs::path& base) { + std::error_code ec; + // Try fs::relative first (handles canonical comparisons, may fail if on different roots) + fs::path rel = fs::relative(path, base, ec); + if (!ec && !rel.empty()) + return rel; + + // Fallback: purely lexical relative (doesn't access filesystem) + rel = path.lexically_relative(base); + if (!rel.empty()) + return rel; + + // Last resort: return filename only (better than absolute when nothing else works) + if (path.has_filename()) + return path.filename(); + return path; +} + +// Find all files under 'directory' that satisfy the first-3-lines LFS keyword check. Default: bool recursive = true +std::vector findLfsLikeFiles(const std::string& directory, bool recursive) { + std::vector matches; + std::error_code ec; + + if (!fs::exists(directory, ec) || !fs::is_directory(directory, ec)) { + return matches; + } + + if (recursive) { + for (fs::recursive_directory_iterator it(directory, ec), end; !ec && it != end; ++it) { + const auto& p = it->path(); + if (fileHasLfsKeywordsFirst3Positional(p)) { + matches.push_back(makeRelativeToBase(p, directory)); + } + } + } else { + for (fs::directory_iterator it(directory, ec), end; !ec && it != end; ++it) { + const auto& p = it->path(); + if (fileHasLfsKeywordsFirst3Positional(p)) { + matches.push_back(makeRelativeToBase(p, directory)); + } + } + } + return matches; +} + +// pick the right entry pointer type for your libgit2 +#if defined(GIT_LIBGIT2_VER_MAJOR) +// libgit2 ≥ 1.0 generally has const-correct free() (accepts const*) +using git_tree_entry_ptr = const git_tree_entry*; +#else +using git_tree_entry_ptr = git_tree_entry*; +#endif + +// Single guard that owns all temporaries used in resumeLfsDownloadForFile +struct GitScope { + git_object* tree_obj = nullptr; // owns the tree as a generic git_object + git_tree_entry_ptr entry = nullptr; // owns the entry + git_blob* blob = nullptr; // owns the blob + git_buf out = GIT_BUF_INIT; // owns the buffer + + GitScope() = default; + ~GitScope() { cleanup(); } + + GitScope(const GitScope&) = delete; + GitScope& operator=(const GitScope&) = delete; + + GitScope(GitScope&& other) noexcept : + tree_obj(other.tree_obj), + entry(other.entry), + blob(other.blob), + out(other.out) { + other.tree_obj = nullptr; + other.entry = nullptr; + other.blob = nullptr; + other.out = GIT_BUF_INIT; + } + GitScope& operator=(GitScope&& other) noexcept { + if (this != &other) { + cleanup(); + tree_obj = other.tree_obj; + entry = other.entry; + blob = other.blob; + out = other.out; + other.tree_obj = nullptr; + other.entry = nullptr; + other.blob = nullptr; + other.out = GIT_BUF_INIT; + } + return *this; + } + + git_tree* tree() const { return reinterpret_cast(tree_obj); } + +private: + void cleanup() noexcept { + git_buf_dispose(&out); + if (blob) { + git_blob_free(blob); + blob = nullptr; + } + if (entry) { + git_tree_entry_free(entry); + entry = nullptr; + } + if (tree_obj) { + git_object_free(tree_obj); + tree_obj = nullptr; + } + } +}; + +void resumeLfsDownloadForFile(git_repository* repo, const char* filePathInRepo) { + GitScope g; + + // Resolve HEAD tree (origin/main^{tree}) + CHECK(git_revparse_single(&g.tree_obj, repo, "origin/main^{tree}")); + + // Find the tree entry by path + CHECK(git_tree_entry_bypath(&g.entry, g.tree(), filePathInRepo)); + + // Ensure it's a blob + if (git_tree_entry_type(g.entry) != GIT_OBJECT_BLOB) { + fprintf(stderr, "[ERROR] Path is not a blob: %s\n", filePathInRepo); + return; // Guard cleans up + } + + // Lookup the blob + CHECK(git_blob_lookup(&g.blob, repo, git_tree_entry_id(g.entry))); + + // Configure filter behavior + git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT; + // Choose direction: + // GIT_BLOB_FILTER_TO_WORKTREE : apply smudge (as if writing to working tree) + // GIT_BLOB_FILTER_TO_ODB : apply clean (as if writing to ODB) + // opts.flags = GIT_BLOB_FILTER_TO_WORKTREE; + + // Apply filters based on .gitattributes for this path (triggers LFS smudge/clean) + CHECK(git_blob_filter(&g.out, g.blob, filePathInRepo, &opts)); + + // We don't need the buffer contents; the filter side-effects are enough. + // All resources (out, blob, entry, tree_obj) will be freed automatically here. +} + Status HfDownloader::downloadModel() { if (FileSystem::isPathEscaped(this->downloadPath)) { SPDLOG_ERROR("Path {} escape with .. is forbidden.", this->downloadPath); @@ -187,8 +560,53 @@ Status HfDownloader::downloadModel() { // Repository exists and we do not want to overwrite if (std::filesystem::is_directory(this->downloadPath) && !this->overwriteModels) { - std::cout << "Path already exists on local filesystem. Skipping download to path: " << this->downloadPath << std::endl; - return StatusCode::OK; + // Checking if the download was partially finished for any files in repository + auto matches = findLfsLikeFiles(this->downloadPath, true); + + if (matches.empty()) { + std::cout << "No files to resume download found.\n"; + std::cout << "Path already exists on local filesystem. Skipping download to path: " << this->downloadPath << std::endl; + return StatusCode::OK; + } else { + std::cout << "Found " << matches.size() << " file(s) to resume partial download:\n"; + for (const auto& p : matches) { + std::cout << " " << p.string() << "\n"; + } + } + + GitRepositoryGuard repoGuard(this->downloadPath); + if (!repoGuard.get()) { + std::cout << "Path already exists on local filesystem. And is not a git repository: " << this->downloadPath << std::endl; + if (repoGuard.git_error_class == 2) + return StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH; + else if (repoGuard.git_error_class == 3) + return StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED; + else + return StatusCode::HF_GIT_STATUS_FAILED; + } + + // Set repository url + std::string passRepoUrl = GetRepositoryUrlWithPassword(); + const char* url = passRepoUrl.c_str(); + int error = git_repository_set_url(repoGuard.get(), url); + if (error < 0) { + const git_error* err = git_error_last(); + if (err) + SPDLOG_ERROR("Repository set url failed: {} {}", err->klass, err->message); + else + SPDLOG_ERROR("Repository set url failed: {}", error); + std::cout << "Path already exists on local filesystem. And set git repository url failed: " << this->downloadPath << std::endl; + return StatusCode::HF_GIT_CLONE_FAILED; + } + + for (const auto& p : matches) { + std::cout << " Resuming " << p.string() << "...\n"; + std::string path = p.string(); + resumeLfsDownloadForFile(repoGuard.get(), path.c_str()); + } + + SPDLOG_DEBUG("Checking repository status."); + return CheckRepositoryStatus(false); } auto status = IModelDownloader::checkIfOverwriteAndRemove(); @@ -197,6 +615,7 @@ Status HfDownloader::downloadModel() { } SPDLOG_DEBUG("Downloading to path: {}", this->downloadPath); + git_repository* cloned_repo = NULL; // clone_opts for progress reporting set in libgit2 lib by patch git_clone_options clone_opts = GIT_CLONE_OPTIONS_INIT; @@ -225,12 +644,17 @@ Status HfDownloader::downloadModel() { SPDLOG_ERROR("Libgit2 clone error: {} message: {}", err->klass, err->message); else SPDLOG_ERROR("Libgit2 clone error: {}", error); - return StatusCode::HF_GIT_CLONE_FAILED; } else if (cloned_repo) { git_repository_free(cloned_repo); } + SPDLOG_DEBUG("Checking repository status."); + status = CheckRepositoryStatus(true); + if (!status.ok()) { + return status; + } + // libgit2 clone sets readonly attributes status = RemoveReadonlyFileAttributeFromDir(this->downloadPath); if (!status.ok()) { diff --git a/src/pull_module/libgit2.hpp b/src/pull_module/libgit2.hpp index 943f3cf725..f2da9f875f 100644 --- a/src/pull_module/libgit2.hpp +++ b/src/pull_module/libgit2.hpp @@ -15,8 +15,10 @@ // limitations under the License. //***************************************************************************** #pragma once -#include +#include #include +#include +#include #include #include @@ -31,6 +33,7 @@ namespace ovms { class Status; +namespace fs = std::filesystem; /* * libgit2 options. 0 is the default value @@ -62,5 +65,13 @@ class HfDownloader : public IModelDownloader { std::string GetRepositoryUrlWithPassword(); bool CheckIfProxySet(); Status RemoveReadonlyFileAttributeFromDir(const std::string& directoryPath); + Status CheckRepositoryStatus(bool checkUntracked); }; + +void rtrimCrLfWhitespace(std::string& s); +bool containsCaseInsensitive(const std::string& hay, const std::string& needle); +bool readFirstThreeLines(const fs::path& p, std::vector& outLines); +bool fileHasLfsKeywordsFirst3Positional(const fs::path& p); +fs::path makeRelativeToBase(const fs::path& path, const fs::path& base); +std::vector findLfsLikeFiles(const std::string& directory, bool recursive = true); } // namespace ovms diff --git a/src/status.cpp b/src/status.cpp index 97a92b9d30..0cc057042b 100644 --- a/src/status.cpp +++ b/src/status.cpp @@ -348,6 +348,10 @@ const std::unordered_map Status::statusMessageMap = { {StatusCode::HF_RUN_OPTIMUM_CLI_EXPORT_FAILED, "Failed to run optimum-cli export command"}, {StatusCode::HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED, "Failed to run convert-tokenizer export command"}, {StatusCode::HF_GIT_CLONE_FAILED, "Failed in libgit2 execution of clone method"}, + {StatusCode::HF_GIT_STATUS_FAILED, "Failed in libgit2 execution of status method"}, + {StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH, "Failed in libgit2 to check repository status for a given path"}, + {StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED, "Libgit2 was not initialized"}, + {StatusCode::HF_GIT_STATUS_UNCLEAN, "Unclean status detected in libgit2 repository path"}, {StatusCode::PARTIAL_END, "Request has finished and no further communication is needed"}, {StatusCode::NONEXISTENT_PATH, "Nonexistent path"}, diff --git a/src/status.hpp b/src/status.hpp index 18a2b093b5..02b42886a5 100644 --- a/src/status.hpp +++ b/src/status.hpp @@ -360,6 +360,10 @@ enum class StatusCode { HF_RUN_OPTIMUM_CLI_EXPORT_FAILED, HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED, HF_GIT_CLONE_FAILED, + HF_GIT_STATUS_FAILED, + HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH, + HF_GIT_LIGIT2_NOT_INITIALIZED, + HF_GIT_STATUS_UNCLEAN, PARTIAL_END, NONEXISTENT_PATH, diff --git a/src/test/libgit2_test.cpp b/src/test/libgit2_test.cpp new file mode 100644 index 0000000000..3774d5f034 --- /dev/null +++ b/src/test/libgit2_test.cpp @@ -0,0 +1,772 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "src/pull_module/libgit2.hpp" + +#include "environment.hpp" + +namespace fs = std::filesystem; + +TEST(LibGit2RtrimCrLfWhitespace, EmptyString) { + std::string s; + ovms::rtrimCrLfWhitespace(s); + EXPECT_TRUE(s.empty()); +} + +TEST(LibGit2RtrimCrLfWhitespace, NoWhitespace) { + std::string s = "abc"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, OnlySpaces) { + std::string s = " "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, ""); +} + +TEST(LibGit2RtrimCrLfWhitespace, LeadingSpacesOnly) { + std::string s = " abc"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TrailingSpacesOnly) { + std::string s = "abc "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, LeadingAndTrailingSpaces) { + std::string s = " abc "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TabsAndNewlinesAround) { + std::string s = "\t\n abc \n\t"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, AllCWhitespaceAround) { + // Include space, tab, newline, vertical tab, form feed, carriage return + std::string s = " \t\n\v\f\rabc\r\f\v\n\t "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, PreserveInternalSpaces) { + std::string s = " a b c "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "a b c"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TrailingCRLF) { + // Windows-style line ending: "\r\n" + std::string s = "abc\r\n"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TrailingCROnly) { + std::string s = "abc\r"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TrailingLFOnly) { + std::string s = "abc\n"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, MultipleTrailingCRs) { + // Only one trailing '\r' is specially removed first, but then trailing + // whitespace loop will remove any remaining CRs (since isspace('\r') == true). + std::string s = "abc\r\r\r"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, LeadingCRLFAndSpaces) { + std::string s = "\r\n abc"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, InternalCRLFShouldRemainIfNotLeadingOrTrailing) { + // Internal whitespace should be preserved + std::string s = "a\r\nb"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "a\r\nb"); +} + +TEST(LibGit2RtrimCrLfWhitespace, OnlyCRLFAndWhitespace) { + std::string s = "\r\n\t \r"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, ""); +} + +TEST(LibGit2RtrimCrLfWhitespace, NonAsciiBytesAreNotTrimmedByIsspace) { + // 0xC2 0xA0 is UTF-8 for NO-BREAK SPACE; bytes individually are not ASCII spaces. + // isspace() on unsigned char typically returns false for these bytes in the "C" locale. + // So they should remain unless at edges and recognized by the current locale (usually not). + std::string s = "\xC2" + "\xA0" + "abc" + "\xC2" + "\xA0"; + ovms::rtrimCrLfWhitespace(s); + // Expect unchanged because these bytes are not recognized by std::isspace in C locale + EXPECT_EQ(s, "\xC2" + "\xA0" + "abc" + "\xC2" + "\xA0"); +} + +TEST(LibGit2RtrimCrLfWhitespace, Idempotent) { + std::string s = " abc \n"; + ovms::rtrimCrLfWhitespace(s); + auto once = s; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, once); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, ExactMatch) { + EXPECT_TRUE(ovms::containsCaseInsensitive("hello", "hello")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, MixedCaseMatch) { + EXPECT_TRUE(ovms::containsCaseInsensitive("HeLLo WoRLD", "world")); + EXPECT_TRUE(ovms::containsCaseInsensitive("HeLLo WoRLD", "HELLO")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, NoMatch) { + EXPECT_FALSE(ovms::containsCaseInsensitive("abcdef", "gh")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, EmptyNeedleReturnsTrue) { + // Consistent with std::string::find("") → 0 + EXPECT_TRUE(ovms::containsCaseInsensitive("something", "")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, EmptyHaystackNonEmptyNeedleReturnsFalse) { + EXPECT_FALSE(ovms::containsCaseInsensitive("", "abc")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, BothEmptyReturnsTrue) { + EXPECT_TRUE(ovms::containsCaseInsensitive("", "")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, SubstringAtBeginning) { + EXPECT_TRUE(ovms::containsCaseInsensitive("HelloWorld", "hello")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, SubstringInMiddle) { + EXPECT_TRUE(ovms::containsCaseInsensitive("abcHELLOxyz", "hello")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, SubstringAtEnd) { + EXPECT_TRUE(ovms::containsCaseInsensitive("testCASE", "case")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, NoFalsePositives) { + EXPECT_FALSE(ovms::containsCaseInsensitive("aaaaa", "b")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, UnicodeCharactersSafeButNotSpecialHandled) { + // std::tolower only reliably handles unsigned char range. + // This ensures your implementation does not crash or behave strangely. + EXPECT_FALSE(ovms::containsCaseInsensitive("ĄĆĘŁ", "ę")); // depends on locale; ASCII-only expected false +} + +// A helper for writing test files. +static fs::path writeTempFile(const std::string& filename, + const std::string& content) { + fs::path p = fs::temp_directory_path() / filename; + std::ofstream out(p, std::ios::binary); + out << content; + return p; +} + +TEST(LibGit2ReadFirstThreeLinesTest, FileNotFoundReturnsFalse) { + std::vector lines; + fs::path p = fs::temp_directory_path() / "nonexistent_12345.txt"; + EXPECT_FALSE(ovms::readFirstThreeLines(p, lines)); + EXPECT_TRUE(lines.empty()); +} + +TEST(LibGit2ReadFirstThreeLinesTest, ReadsExactlyThreeLines) { + fs::path p = writeTempFile("three_lines.txt", + "line1\n" + "line2\n" + "line3\n" + "extra\n"); // should be ignored + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + ASSERT_EQ(out.size(), 3u); + EXPECT_EQ(out[0], "line1"); + EXPECT_EQ(out[1], "line2"); + EXPECT_EQ(out[2], "line3"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, ReadsFewerThanThreeLines) { + fs::path p = writeTempFile("two_lines.txt", + "alpha\n" + "beta\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + ASSERT_EQ(out.size(), 2u); + EXPECT_EQ(out[0], "alpha"); + EXPECT_EQ(out[1], "beta"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, ReadsOneLineOnly) { + fs::path p = writeTempFile("one_line.txt", "solo\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + ASSERT_EQ(out.size(), 1u); + EXPECT_EQ(out[0], "solo"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, EmptyFileProducesZeroLinesAndReturnsTrue) { + fs::path p = writeTempFile("empty.txt", ""); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + EXPECT_TRUE(out.empty()); +} + +TEST(LibGit2ReadFirstThreeLinesTest, CRLFIsTrimmedCorrectly) { + fs::path p = writeTempFile("crlf.txt", + "hello\r\n" + "world\r\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + ASSERT_EQ(out.size(), 2u); + EXPECT_EQ(out[0], "hello"); + EXPECT_EQ(out[1], "world"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, LoneCRAndLFAreTrimmed) { + fs::path p = writeTempFile("mixed_newlines.txt", + "a\r" + "b\n" + "c\r\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + + ASSERT_EQ(out.size(), 3u); + EXPECT_EQ(out[0], "a"); + EXPECT_EQ(out[1], "b"); + EXPECT_EQ(out[2], "c"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, HandlesEOFWithoutNewlineAtEnd) { + fs::path p = writeTempFile("eof_no_newline.txt", + "first\n" + "second\n" + "third_without_newline"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + + ASSERT_EQ(out.size(), 3u); + EXPECT_EQ(out[0], "first"); + EXPECT_EQ(out[1], "second"); + EXPECT_EQ(out[2], "third_without_newline"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, TrailingWhitespaceNotPreserved) { + fs::path p = writeTempFile("spaces.txt", + "abc \n" + "def\t\t\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + + ASSERT_EQ(out.size(), 2u); + EXPECT_EQ(out[0], "abc"); // spaces preserved + EXPECT_EQ(out[1], "def"); // tabs preserved +} + +// Optional: If you need to call readFirstThreeLines in any test-specific checks, +// declare it too (remove if unused here). +// bool readFirstThreeLines(const fs::path& p, std::vector& out); + +// ---- Test Utilities ---- + +// Create a unique temporary directory inside the system temp directory. +static fs::path createTempDir() { + const fs::path base = fs::temp_directory_path(); + std::random_device rd; + std::mt19937_64 gen(rd()); + std::uniform_int_distribution dist; + + // Try a reasonable number of times to avoid rare collisions + for (int attempt = 0; attempt < 100; ++attempt) { + auto candidate = base / ("lfs_kw_tests_" + std::to_string(dist(gen))); + std::error_code ec; + if (fs::create_directory(candidate, ec)) { + return candidate; + } + // If creation failed due to existing path, loop and try another name + // Otherwise (e.g., permissions), fall through and try again up to limit + } + + throw std::runtime_error("Failed to create a unique temporary directory"); +} + +static fs::path writeFile(const fs::path& dir, const std::string& name, const std::string& content) { + fs::path p = dir / name; + std::ofstream out(p, std::ios::binary); + if (!out) + throw std::runtime_error("Failed to create file: " + p.string()); + out.write(content.data(), static_cast(content.size())); + return p; +} + +// A simple RAII for a temp directory +struct TempDir { + fs::path dir; + TempDir() : + dir(createTempDir()) { + if (dir.empty()) + throw std::runtime_error("Failed to create temp directory"); + } + ~TempDir() { + std::error_code ec; + fs::remove_all(dir, ec); + } +}; + +class LibGit2FileHasLfsKeywordsFirst3PositionalTest : public ::testing::Test { +protected: + TempDir td; +}; + +// ---- Tests ---- + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ReturnsFalseForNonExistingFile) { + fs::path p = td.dir / "does_not_exist.txt"; + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ReturnsFalseForDirectoryPath) { + // Passing the directory itself (not a regular file) + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(td.dir)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ReturnsFalseForEmptyFile) { + auto p = writeFile(td.dir, "empty.txt", ""); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ReturnsFalseForLessThanThreeLines) { + { + auto p = writeFile(td.dir, "one_line.txt", "version something\n"); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); + } + { + auto p = writeFile(td.dir, "two_lines.txt", "version x\n" + "oid y\n"); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); + } +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, HappyPathCaseInsensitiveAndExtraContent) { + // Lines contain the keywords somewhere (case-insensitive), extra content is okay. + const std::string content = + " VeRsIoN https://git-lfs.github.com/spec/v1 \n" + "\toid Sha256:abcdef1234567890\n" + "size 999999 \t \n"; + auto p = writeFile(td.dir, "ok.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, WrongOrderShouldFail) { + // Put keywords in wrong lines + const std::string content = + "size 100\n" + "version something\n" + "oid abc\n"; + auto p = writeFile(td.dir, "wrong_order.txt", content); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, MissingKeywordShouldFail) { + // Line1 has version, line2 missing oid, line3 has size + const std::string content = + "version v1\n" + "hash sha256:abc\n" + "size 42\n"; + auto p = writeFile(td.dir, "missing_keyword.txt", content); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, MixedNewlines_CR_LF_CRLF_ShouldPass) { + // Requires readFirstThreeLines to treat \r, \n, and \r\n as line breaks. + const std::string content = + "version one\r" + "oid two\n" + "size three\r\n"; + auto p = writeFile(td.dir, "mixed_newlines.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, LeadingAndTrailingWhitespaceDoesNotBreak) { + // Assuming readFirstThreeLines trims edge whitespace; otherwise contains() still works + const std::string content = + " version \n" + "\t oid\t\n" + " size \t\n"; + auto p = writeFile(td.dir, "whitespace.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, KeywordsMayAppearWithinLongerTextOnEachLine) { + const std::string content = + "prefix-version-suffix\n" + "some_oid_here\n" + "the_size_is_here\n"; + auto p = writeFile(td.dir, "contains_substrings.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, CaseInsensitiveCheck) { + const std::string content = + "VerSiOn 1\n" + "OID something\n" + "SiZe 123\n"; + auto p = writeFile(td.dir, "case_insensitive.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ExtraLinesAfterFirstThreeDoNotMatter) { + const std::string content = + "version v1\n" + "oid abc\n" + "size 42\n" + "EXTRA LINE THAT SHOULD NOT AFFECT RESULT\n"; + auto p = writeFile(td.dir, "extra_lines.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +class LibGit2MakeRelativeToBaseTest : public ::testing::Test { +protected: + TempDir td; +}; + +// Base is an ancestor of path → should return the relative tail. +TEST_F(LibGit2MakeRelativeToBaseTest, BaseIsAncestor) { + fs::path base = td.dir / "root"; + fs::path sub = base / "a" / "b" / "file.txt"; + + std::error_code ec; + fs::create_directories(sub.parent_path(), ec); + + fs::path rel = ovms::makeRelativeToBase(sub, base); + // Expected: "a/b/file.txt" (platform-correct separators) + EXPECT_EQ(rel, fs::path("a") / "b" / "file.txt"); +} + +// Path equals base → fs::relative returns "." (non-empty), we keep it. +TEST_F(LibGit2MakeRelativeToBaseTest, PathEqualsBase) { + fs::path base = td.dir / "same"; + std::error_code ec; + fs::create_directories(base, ec); + + fs::path rel = ovms::makeRelativeToBase(base, base); + EXPECT_EQ(rel, fs::path(".")); +} + +// Sibling subtree: base is ancestor of both; result is still relative path from base. +TEST_F(LibGit2MakeRelativeToBaseTest, SiblingSubtree) { + fs::path base = td.dir / "root2"; + fs::path a = base / "a" / "deep" / "fileA.txt"; + fs::path b = base / "b"; + + std::error_code ec; + fs::create_directories(a.parent_path(), ec); + fs::create_directories(b, ec); + + fs::path rel = ovms::makeRelativeToBase(a, base); + EXPECT_EQ(rel, fs::path("a") / "deep" / "fileA.txt"); +} + +// Base is not an ancestor but on same root → return a proper upward relative like "../x/y". +TEST_F(LibGit2MakeRelativeToBaseTest, BaseIsNotAncestorButSameRoot) { + fs::path base = td.dir / "top" / "left"; + fs::path path = td.dir / "top" / "right" / "x" / "y.txt"; + + std::error_code ec; + fs::create_directories(base, ec); + fs::create_directories(path.parent_path(), ec); + + fs::path rel = ovms::makeRelativeToBase(path, base); + // From .../top/left to .../top/right/x/y.txt → "../right/x/y.txt" + EXPECT_EQ(rel, fs::path("..") / "right" / "x" / "y.txt"); +} + +// Works even if paths do not exist (lexical computation should still yield a sensible result) +TEST_F(LibGit2MakeRelativeToBaseTest, NonExistingPathsLexicalStillWorks) { + fs::path base = td.dir / "ghost" / "base"; + fs::path path = td.dir / "ghost" / "base" / "sub" / "file.dat"; + // No directories created + + fs::path rel = ovms::makeRelativeToBase(path, base); + EXPECT_EQ(rel, fs::path("sub") / "file.dat"); +} + +// Last resort on Windows: different drive letters → fs::relative fails, +// lexically_relative returns empty → function should return filename only. +#ifdef _WIN32 +TEST_F(LibGit2MakeRelativeToBaseTest, DifferentDrivesReturnsFilenameOnly) { + // NOTE: We don't touch the filesystem; we only test the path logic. + // Choose typical drive letters; test won't fail if the drive doesn't exist + // because we don't access the filesystem in lexically_relative path. + fs::path path = fs::path("D:\\folder\\file.txt"); + fs::path base = fs::path("C:\\another\\base"); + + fs::path rel = ovms::makeRelativeToBase(path, base); + EXPECT_EQ(rel, fs::path("file.txt")); +} +#endif + +// If path has no filename (e.g., it's a root), last resort returns path itself. +// On POSIX, "/" has no filename; on Windows, "C:\\" has no filename either. +TEST_F(LibGit2MakeRelativeToBaseTest, NoFilenameEdgeCaseReturnsPathItself) { + fs::path base = td.dir; // arbitrary +#if defined(_WIN32) + // Construct a path that has no filename: root-name + root-directory + // We can't know the system drive at compile time; use a generic root directory. + // For the test, we simulate a root-only path lexically. + fs::path path = fs::path("C:\\"); // has no filename +#else + fs::path path = fs::path("../.."); // has no filename +#endif + + fs::path rel = ovms::makeRelativeToBase(path, base); + EXPECT_EQ(rel, path); +} + +static void mkdirs(const fs::path& p) { + std::error_code ec; + fs::create_directories(p, ec); +} + +class LibGit2FindLfsLikeFilesTest : public ::testing::Test { +protected: + TempDir td; + + // Utility: sort paths lexicographically for deterministic comparison + static void sortPaths(std::vector& v) { + std::sort(v.begin(), v.end(), [](const fs::path& a, const fs::path& b) { + return a.generic_string() < b.generic_string(); + }); + } +}; + +// --- Tests --- + +TEST_F(LibGit2FindLfsLikeFilesTest, NonExistingDirectoryReturnsEmpty) { + fs::path nonexist = td.dir / "does_not_exist"; + auto matches = ovms::findLfsLikeFiles(nonexist.string(), /*recursive=*/true); + EXPECT_TRUE(matches.empty()); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, EmptyDirectoryReturnsEmpty) { + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + EXPECT_TRUE(matches.empty()); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, NonRecursiveFindsOnlyTopLevelMatches) { + // Layout: + // td.dir/ + // match_top.txt (should match) + // nomatch_top.txt (should not match) + // sub/ + // match_nested.txt (should match but NOT included in non-recursive) + // Matching condition: lines[0] contains "version", lines[1] contains "oid", lines[2] contains "size" + + // Create top-level files + writeFile(td.dir, "match_top.txt", + "version v1\n" + "oid sha256:abc\n" + "size 123\n"); + + writeFile(td.dir, "nomatch_top.txt", + "version v1\n" + "hash something\n" // missing "oid" on line 2 + "size 123\n"); + + // Create nested directory and file + fs::path sub = td.dir / "sub"; + mkdirs(sub); + writeFile(sub, "match_nested.txt", + " VERSION v1 \n" + "\toid: 123\n" + "size: 42\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/false); + sortPaths(matches); + + std::vector expected = {fs::path("match_top.txt")}; + sortPaths(expected); + + EXPECT_EQ(matches, expected); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, RecursiveFindsNestedMatches) { + // Same layout as previous test but recursive = true; should include nested match as relative path + writeFile(td.dir, "top_match.txt", + "version spec\n" + "oid hash\n" + "size 1\n"); + + fs::path sub = td.dir / "a" / "b"; + mkdirs(sub); + writeFile(sub, "nested_match.txt", + "VeRsIoN\n" + "OID x\n" + "SiZe y\n"); + + // Add a deeper non-match to ensure it is ignored + fs::path deeper = td.dir / "a" / "b" / "c"; + mkdirs(deeper); + writeFile(deeper, "deep_nomatch.txt", + "hello\n" + "world\n" + "!\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + sortPaths(matches); + + std::vector expected = { + fs::path("top_match.txt"), + fs::path("a") / "b" / "nested_match.txt"}; + sortPaths(expected); + + EXPECT_EQ(matches, expected); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, MixedNewlinesInMatchingFilesAreHandled) { + // Requires underlying readFirstThreeLines + fileHasLfsKeywordsFirst3Positional to handle \r, \n, \r\n + writeFile(td.dir, "mixed1.txt", + "version one\r" + "oid two\n" + "size three\r\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/false); + + ASSERT_EQ(matches.size(), 1u); + EXPECT_EQ(matches[0], fs::path("mixed1.txt")); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, WrongOrderOrMissingKeywordsAreNotIncluded) { + writeFile(td.dir, "wrong_order.txt", + "size 1\n" + "version 2\n" + "oid 3\n"); // wrong order → should not match + + writeFile(td.dir, "missing_second.txt", + "version v1\n" + "hash something\n" // missing "oid" + "size 3\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/false); + EXPECT_TRUE(matches.empty()); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, OnlyRegularFilesConsidered) { + // Create a directory with LFS-like name to ensure it isn't treated as a file + fs::path lfsdir = td.dir / "version_oid_size_dir"; + mkdirs(lfsdir); + + // No files → nothing should match + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + EXPECT_TRUE(matches.empty()); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, ReturnsPathsRelativeToBaseDirectory) { + // Ensure results are made relative to the provided base dir. + writeFile(td.dir, "root_match.txt", + "version v\n" + "oid o\n" + "size s\n"); + fs::path sub = td.dir / "x" / "y"; + mkdirs(sub); + writeFile(sub, "nested_match.txt", + "version v\n" + "oid o\n" + "size s\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + sortPaths(matches); + + std::vector expected = { + fs::path("root_match.txt"), + fs::path("x") / "y" / "nested_match.txt"}; + sortPaths(expected); + + EXPECT_EQ(matches, expected); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, NonRecursiveDoesNotDescendButStillUsesRelativePaths) { + fs::path sub = td.dir / "subdir"; + mkdirs(sub); + + writeFile(td.dir, "toplevel.txt", + "version a\n" + "oid b\n" + "size c\n"); + + writeFile(sub, "nested.txt", + "version a\n" + "oid b\n" + "size c\n"); + + auto matches_nonrec = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/false); + auto matches_rec = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + + // Non-recursive: only top-level + ASSERT_EQ(matches_nonrec.size(), 1u); + EXPECT_EQ(matches_nonrec[0], fs::path("toplevel.txt")); + + // Recursive: both, relative to base dir + sortPaths(matches_rec); + std::vector expected = { + fs::path("toplevel.txt"), + fs::path("subdir") / "nested.txt"}; + sortPaths(expected); + EXPECT_EQ(matches_rec, expected); +} diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index b29bbee326..ba3d2143d6 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -14,6 +14,7 @@ // limitations under the License. //***************************************************************************** #include +#include #include #include @@ -39,6 +40,8 @@ #include "environment.hpp" +namespace fs = std::filesystem; + class HfDownloaderPullHfModel : public TestWithTempDir { protected: ovms::Server& server = ovms::Server::instance(); @@ -168,6 +171,161 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownload) { ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; } +// Truncate the file to half its size, keeping the first half. +bool removeSecondHalf(const std::string& filrStr) { + const fs::path& file(filrStr); + std::error_code ec; + ec.clear(); + + if (!fs::exists(file, ec) || !fs::is_regular_file(file, ec)) { + if (!ec) + ec = std::make_error_code(std::errc::no_such_file_or_directory); + return false; + } + + const std::uintmax_t size = fs::file_size(file, ec); + if (ec) + return false; + + const std::uintmax_t newSize = size / 2; // floor(size/2) + fs::resize_file(file, newSize, ec); + return !ec; +} + +bool createGitLfsPointerFile(const std::string& path) { + std::ofstream file(path, std::ios::binary); + if (!file.is_open()) { + return false; + } + + file << "version https://git-lfs.github.com/spec/v1\n" + "oid sha256:cecf0224201415144c00cf3a6cf3350306f9c78888d631eb590939a63722fefa\n" + "size 52417240\n"; + + return true; +} + +// Returns lowercase hex SHA-256 string on success, empty string on failure. +std::string sha256File(std::string_view path, std::error_code& ec) { + ec.clear(); + + std::ifstream ifs(std::string(path), std::ios::binary); + if (!ifs) { + ec = std::make_error_code(std::errc::no_such_file_or_directory); + return {}; + } + + SHA256_CTX ctx; + if (SHA256_Init(&ctx) != 1) { + ec = std::make_error_code(std::errc::io_error); + return {}; + } + + // Read in chunks to support large files without high memory usage. + std::vector buffer(1 << 20); // 1 MiB + while (ifs) { + ifs.read(reinterpret_cast(buffer.data()), static_cast(buffer.size())); + std::streamsize got = ifs.gcount(); + if (got > 0) { + if (SHA256_Update(&ctx, buffer.data(), static_cast(got)) != 1) { + ec = std::make_error_code(std::errc::io_error); + return {}; + } + } + } + if (!ifs.eof()) { // read failed not due to EOF + ec = std::make_error_code(std::errc::io_error); + return {}; + } + + std::array digest{}; + if (SHA256_Final(digest.data(), &ctx) != 1) { + ec = std::make_error_code(std::errc::io_error); + return {}; + } + + // Convert to lowercase hex + std::ostringstream oss; + oss << std::hex << std::setfill('0') << std::nouppercase; + for (unsigned char b : digest) { + oss << std::setw(2) << static_cast(b); + } + return oss.str(); +} + +class TestHfDownloader : public ovms::HfDownloader { +public: + TestHfDownloader(const std::string& sourceModel, const std::string& downloadPath, const std::string& hfEndpoint, const std::string& hfToken, const std::string& httpProxy, bool overwrite) : + HfDownloader(sourceModel, downloadPath, hfEndpoint, hfToken, httpProxy, overwrite) {} + std::string GetRepoUrl() { return HfDownloader::GetRepoUrl(); } + std::string GetRepositoryUrlWithPassword() { return HfDownloader::GetRepositoryUrlWithPassword(); } + bool CheckIfProxySet() { return HfDownloader::CheckIfProxySet(); } + const std::string& getEndpoint() { return this->hfEndpoint; } + const std::string& getProxy() { return this->httpProxy; } + std::string getGraphDirectory(const std::string& downloadPath, const std::string& sourceModel) { return IModelDownloader::getGraphDirectory(downloadPath, sourceModel); } + std::string getGraphDirectory() { return HfDownloader::getGraphDirectory(); } + ovms::Status CheckRepositoryStatus(bool checkUntracked) { return HfDownloader::CheckRepositoryStatus(checkUntracked); } +}; + +TEST_F(HfDownloaderPullHfModel, Resume) { + std::string modelName = "OpenVINO/Phi-3-mini-FastDraft-50M-int8-ov"; + std::string downloadPath = ovms::FileSystem::joinPath({this->directoryPath, "repository"}); + std::string task = "text_generation"; + this->ServerPullHfModel(modelName, downloadPath, task); + server.setShutdownRequest(1); + if (t) + t->join(); + server.setShutdownRequest(0); + + std::string ovModelName = "openvino_model.bin"; + std::string basePath = ovms::FileSystem::joinPath({this->directoryPath, "repository", "OpenVINO", "Phi-3-mini-FastDraft-50M-int8-ov"}); + std::string modelPath = ovms::FileSystem::appendSlash(basePath) + ovModelName; + std::string graphPath = ovms::FileSystem::appendSlash(basePath) + "graph.pbtxt"; + + ASSERT_EQ(std::filesystem::exists(modelPath), true) << modelPath; + ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath; + ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); + std::string graphContents = GetFileContents(graphPath); + + ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + + // Check status function + std::unique_ptr hfDownloader = std::make_unique(modelName, ovms::IModelDownloader::getGraphDirectory(downloadPath, modelName), "", "", "", false); + + // Fails because we want clean and it has the graph.pbtxt after download + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(true).getCode(), ovms::StatusCode::HF_GIT_STATUS_UNCLEAN); + + std::error_code ec; + ec.clear(); + std::string expectedDigest = sha256File(modelPath, ec); + ASSERT_EQ(ec, std::errc()); + // Prepare a git repository with a lfs_part file and lfs pointer file to simulate partial download error of a big model + ASSERT_EQ(removeSecondHalf(modelPath), true); + ASSERT_EQ(std::filesystem::file_size(modelPath), 26208620); + + std::string ovModelPartLfsName = "openvino_model.binlfs_part"; + std::string ovModelPartLfsPath = ovms::FileSystem::appendSlash(basePath) + ovModelPartLfsName; + fs::rename(modelPath, ovModelPartLfsPath, ec); + ASSERT_EQ(ec, std::errc()); + ASSERT_EQ(std::filesystem::file_size(ovModelPartLfsPath), 26208620); + ASSERT_EQ(createGitLfsPointerFile(modelPath), true); + + // Call ovms pull to resume the file + this->ServerPullHfModel(modelName, downloadPath, task); + + ASSERT_EQ(std::filesystem::exists(ovModelPartLfsPath), false) << modelPath; + ASSERT_EQ(std::filesystem::exists(modelPath), true) << modelPath; + ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath; + ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); + graphContents = GetFileContents(graphPath); + + ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + + std::string resumedDigest = sha256File(modelPath, ec); + ASSERT_EQ(ec, std::errc()); + ASSERT_EQ(expectedDigest, resumedDigest); +} + TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStart) { SKIP_AND_EXIT_IF_NOT_RUNNING_UNSTABLE(); // CVS-180127 // EnvGuard guard; @@ -301,19 +459,6 @@ class TestOptimumDownloader : public ovms::OptimumDownloader { bool checkIfTokenizerFileIsExported() { return ovms::OptimumDownloader::checkIfTokenizerFileIsExported(); } }; -class TestHfDownloader : public ovms::HfDownloader { -public: - TestHfDownloader(const std::string& sourceModel, const std::string& downloadPath, const std::string& hfEndpoint, const std::string& hfToken, const std::string& httpProxy, bool overwrite) : - HfDownloader(sourceModel, downloadPath, hfEndpoint, hfToken, httpProxy, overwrite) {} - std::string GetRepoUrl() { return HfDownloader::GetRepoUrl(); } - std::string GetRepositoryUrlWithPassword() { return HfDownloader::GetRepositoryUrlWithPassword(); } - bool CheckIfProxySet() { return HfDownloader::CheckIfProxySet(); } - const std::string& getEndpoint() { return this->hfEndpoint; } - const std::string& getProxy() { return this->httpProxy; } - std::string getGraphDirectory(const std::string& downloadPath, const std::string& sourceModel) { return IModelDownloader::getGraphDirectory(downloadPath, sourceModel); } - std::string getGraphDirectory() { return HfDownloader::getGraphDirectory(); } -}; - TEST(HfDownloaderClassTest, Methods) { std::string modelName = "model/name"; std::string downloadPath = "/path/to/Download"; @@ -337,6 +482,32 @@ TEST(HfDownloaderClassTest, Methods) { ASSERT_EQ(hfDownloader->getGraphDirectory(), expectedPath); } +TEST(HfDownloaderClassTest, RepositoryStatusCheckErrors) { + std::string modelName = "model/name"; + std::string downloadPath = "/path/to/Download"; + std::string hfEndpoint = "www.new_hf.com/"; + std::string hfToken = "123$$o_O123!AAbb"; + std::string httpProxy = "https://proxy_test1:123"; + std::unique_ptr hfDownloader = std::make_unique(modelName, ovms::IModelDownloader::getGraphDirectory(downloadPath, modelName), hfEndpoint, hfToken, httpProxy, false); + + // Fails without libgit init + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(true).getCode(), ovms::StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED); + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(false).getCode(), ovms::StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED); + + auto guardOrError = ovms::createGuard(); + ASSERT_EQ(std::holds_alternative(guardOrError), false); + + // Path does not exist + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(true).getCode(), ovms::StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH); + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(false).getCode(), ovms::StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH); + + // Path not a git repository + downloadPath = getGenericFullPathForSrcTest("/tmp/"); + std::unique_ptr existingHfDownloader = std::make_unique(modelName, downloadPath, hfEndpoint, hfToken, httpProxy, false); + ASSERT_EQ(existingHfDownloader->CheckRepositoryStatus(true).getCode(), ovms::StatusCode::HF_GIT_STATUS_FAILED); + ASSERT_EQ(existingHfDownloader->CheckRepositoryStatus(false).getCode(), ovms::StatusCode::HF_GIT_STATUS_FAILED); +} + class TestOptimumDownloaderSetup : public ::testing::Test { public: ovms::HFSettingsImpl inHfSettings; @@ -588,6 +759,53 @@ class HfDownloaderHfEnvTest : public ::testing::Test { EnvGuard guard; }; +TEST(Libgt2InitGuardTest, LfsFilterCaptureDefaultResumeOptions) { + // Need new process beacase we use INIT_ONCE in libgit2 lfs filter for env variables and once they are set they are set for the whole process lifetime + EXPECT_EXIT({ + // Act: capture stdout during object construction + testing::internal::CaptureStdout(); + { + auto guardOrError = ovms::createGuard(); + ASSERT_EQ(std::holds_alternative(guardOrError), false); + } + std::string output = testing::internal::GetCapturedStdout(); + + // Optional: trim trailing newline + if (!output.empty() && output.back() == '\n') { + output.pop_back(); + } + + EXPECT_THAT(output, ::testing::HasSubstr("[INFO] LFS resume: attempts=5 interval=10 s")); + exit(0); + }, + ::testing::ExitedWithCode(0), ""); +} + +TEST(Libgt2InitGuardTest, LfsFilterCaptureNonDefaultResumeOptions) { + // Need new process beacase we use INIT_ONCE in libgit2 lfs filter for env variables and once they are set they are set for the whole process lifetime + EXPECT_EXIT({ + EnvGuard guard; + guard.set("GIT_LFS_RESUME_ATTEMPTS", "3"); + guard.set("GIT_LFS_RESUME_INTERVAL", "20"); + // Act: capture stdout during object construction + testing::internal::CaptureStdout(); + { + auto guardOrError = ovms::createGuard(); + ASSERT_EQ(std::holds_alternative(guardOrError), false); + } + std::string output = testing::internal::GetCapturedStdout(); + + // Optional: trim trailing newline + if (!output.empty() && output.back() == '\n') { + output.pop_back(); + } + + EXPECT_THAT(output, ::testing::HasSubstr("[INFO] LFS resume: attempts=3 interval=20 s")); + exit(0); + }, + ::testing::ExitedWithCode(0), ""); +} + TEST_F(HfDownloaderHfEnvTest, Methods) { std::string modelName = "model/name"; std::string downloadPath = "/path/to/Download"; diff --git a/third_party/libgit2/lfs.patch b/third_party/libgit2/lfs.patch index 6139df9e44..ffa4cb7f43 100644 --- a/third_party/libgit2/lfs.patch +++ b/third_party/libgit2/lfs.patch @@ -24,6 +24,56 @@ index 31da49a88..d61c9735e 100644 if(BUILD_EXAMPLES) add_subdirectory(examples) endif() +diff --git a/cmake/ExperimentalFeatures.cmake b/cmake/ExperimentalFeatures.cmake +index 7eff40bdb..5562acc77 100644 +--- a/cmake/ExperimentalFeatures.cmake ++++ b/cmake/ExperimentalFeatures.cmake +@@ -18,6 +18,3 @@ else() + add_feature_info("SHA256 API" OFF "experimental SHA256 APIs") + endif() + +-if(EXPERIMENTAL) +- set(LIBGIT2_FILENAME "${LIBGIT2_FILENAME}-experimental") +-endif() +diff --git a/include/git2/oid.h b/include/git2/oid.h +index 0af9737a0..6d9a8b08a 100644 +--- a/include/git2/oid.h ++++ b/include/git2/oid.h +@@ -22,14 +22,8 @@ GIT_BEGIN_DECL + + /** The type of object id. */ + typedef enum { +- +-#ifdef GIT_EXPERIMENTAL_SHA256 + GIT_OID_SHA1 = 1, /**< SHA1 */ + GIT_OID_SHA256 = 2 /**< SHA256 */ +-#else +- GIT_OID_SHA1 = 1 /**< SHA1 */ +-#endif +- + } git_oid_t; + + /* +diff --git a/include/git2/repository.h b/include/git2/repository.h +index b203576af..26309dd3f 100644 +--- a/include/git2/repository.h ++++ b/include/git2/repository.h +@@ -184,6 +184,15 @@ GIT_EXTERN(int) git_repository_open_ext( + unsigned int flags, + const char *ceiling_dirs); + ++/** ++ * Set repository url member ++ * ++ * ++ * @param repo repository handle to update. If NULL nothing occurs. ++ * @param url the remote repository to clone or run checkout against. ++ */ ++GIT_EXTERN(int) git_repository_set_url(git_repository *repo, const char *url); ++ + /** + * Open a bare repository on the serverside. + * diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index 60466d173..a35ad5f98 100644 --- a/include/git2/sys/filter.h @@ -61,9 +111,21 @@ index d121c588a..b54a01a4b 100644 # diff --git a/src/cli/cmd_clone.c b/src/cli/cmd_clone.c -index c18cb28d4..286fa7153 100644 +index c18cb28d4..9f89cd1b3 100644 --- a/src/cli/cmd_clone.c +++ b/src/cli/cmd_clone.c +@@ -4,9 +4,9 @@ + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +- +-#include + #include ++#include ++ + #include "common.h" + #include "cmd.h" + #include "error.h" @@ -146,6 +146,7 @@ int cmd_clone(int argc, char **argv) clone_opts.bare = !!bare; clone_opts.checkout_branch = branch; @@ -72,6 +134,76 @@ index c18cb28d4..286fa7153 100644 if (!checkout) clone_opts.checkout_opts.checkout_strategy = GIT_CHECKOUT_NONE; +@@ -182,6 +183,69 @@ int cmd_clone(int argc, char **argv) + + cli_progress_finish(&progress); + ++ // Code below for testing resume in native libgit2 ++ git_repository *repo2 = NULL; ++ int error = git_repository_open_ext(&repo2, local_path, 0, NULL); ++ // HEAD state info ++ bool is_detached = git_repository_head_detached(repo2) == 1; ++ bool is_unborn = git_repository_head_unborn(repo2) == 1; ++ ++ // Collect status (staged/unstaged/untracked) ++ git_status_options opts = GIT_STATUS_OPTIONS_INIT; ++ ++ opts.show = GIT_STATUS_SHOW_INDEX_AND_WORKDIR; ++ opts.flags = GIT_STATUS_OPT_INCLUDE_UNTRACKED // include untracked files ++ // // | ++ // GIT_STATUS_OPT_RENAMES_HEAD_TO_INDEX ++ // // detect renames ++ // HEAD->index - not ++ // required currently and ++ // impacts performance ++ | GIT_STATUS_OPT_SORT_CASE_SENSITIVELY; ++ ++ git_status_list *status_list = NULL; ++ ret = git_status_list_new(&status_list, repo2, &opts); ++ ++ size_t staged = 0, unstaged = 0, untracked = 0, conflicted = 0; ++ const size_t n = git_status_list_entrycount(status_list); ++ ++ for (size_t i = 0; i < n; ++i) { ++ const git_status_entry *e = git_status_byindex(status_list, i); ++ if (!e) ++ continue; ++ unsigned s = e->status; ++ ++ // Staged (index) changes ++ if (s & (GIT_STATUS_INDEX_NEW | GIT_STATUS_INDEX_MODIFIED | ++ GIT_STATUS_INDEX_DELETED | GIT_STATUS_INDEX_RENAMED | ++ GIT_STATUS_INDEX_TYPECHANGE)) ++ ++staged; ++ ++ // Unstaged (workdir) changes ++ if (s & (GIT_STATUS_WT_MODIFIED | GIT_STATUS_WT_DELETED | ++ GIT_STATUS_WT_RENAMED | GIT_STATUS_WT_TYPECHANGE)) ++ ++unstaged; ++ ++ // Untracked ++ if (s & GIT_STATUS_WT_NEW) ++ ++untracked; ++ ++ // Conflicted ++ if (s & GIT_STATUS_CONFLICTED) ++ ++conflicted; ++ } ++ ++ // Print summary (mirrors your original stream output) ++ printf("HEAD state : %s\n", ++ is_unborn ? "unborn (no commits)" : ++ (is_detached ? "detached" : "attached")); ++ printf("Staged changes : %zu\n", staged); ++ printf("Unstaged changes: %zu\n", unstaged); ++ printf("Untracked files : %zu", untracked); ++ if (conflicted) { ++ printf(" (%zu paths flagged)", conflicted); ++ } ++ printf("\n"); + done: + cli_progress_dispose(&progress); + git__free(computed_path); diff --git a/src/cli/progress.h b/src/cli/progress.h index f08d68f19..0344304ec 100644 --- a/src/cli/progress.h @@ -312,11 +444,11 @@ index 58cb4b424..00ddee9f3 100644 git_writestream **out, diff --git a/src/libgit2/lfs_filter.c b/src/libgit2/lfs_filter.c new file mode 100644 -index 000000000..484811a0c +index 000000000..3d77e9493 --- /dev/null +++ b/src/libgit2/lfs_filter.c -@@ -0,0 +1,567 @@ -+/* +@@ -0,0 +1,1513 @@ ++/* +/ Copyright 2025 Intel Corporation +/ +/ Licensed under the Apache License, Version 2.0 (the "License"); @@ -332,72 +464,477 @@ index 000000000..484811a0c +/ limitations under the License. +*/ + ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ +#include "array.h" +#include "common.h" -+ -+#include +#include "git2/sys/filter.h" ++#include "hash.h" ++#include "oid.h" +#include "filter.h" +#include "str.h" +#include "repository.h" +#include "regexp.h" +#include "time.h" + -+typedef struct lfs_attrs ++#define LFS_RESUME_ATTEMPTS_DEFAULT 5 ++#define LFS_RESUME_ATTEMPTS_MAX 100 ++#define LFS_RESUME_INTERVAL_DEFAULT 10 ++#define LFS_RESUME_INTERVAL_MAX 3600 ++ ++/* Configure how many resume attempts and how long to wait between them */ ++static int g_lfs_resume_attempts = 5; /* <-- make configurable */ ++static unsigned int g_lfs_resume_interval_secs = 10; /* <-- make configurable */ ++ ++/* ++ * parse_env_nonneg_int ++ * --------------------- ++ * Parses an environment variable as a non‑negative integer. ++ * ++ * Parameters: ++ * env_name - Name of the environment variable to read. ++ * default_value - Value to use if the env variable is missing/invalid. ++ * min_value - Minimum allowed integer value. ++ * max_value - Maximum allowed integer value. ++ * out_value - Output pointer where parsed/clamped value is stored. ++ * ++ * Returns: ++ * 0 on success (or default fallback), ++ * -1 if parsing fails but default is used. ++ */ ++static int parse_env_nonneg_int( ++ const char *env_name, ++ int default_value, ++ int min_value, ++ int max_value, ++ int *out_value) +{ -+ const char *path; -+ const char* full_path; -+ const char* workdir; -+ const char *lfs_oid; -+ const char *lfs_size; -+ const char *url; ++ char *end; ++ unsigned long long val; ++ int ival; ++ const char *s = getenv(env_name); ++ if (!s || !*s) { ++ *out_value = default_value; ++ return 0; ++ } ++ ++ /* Trim leading spaces */ ++ while (isspace((unsigned char)*s)) ++ s++; ++ ++ errno = 0; ++ end = NULL; ++ val = strtoull(s, &end, 10); ++ ++ if (errno == ERANGE || end == s) { ++ fprintf(stderr, "[WARN] %s: invalid number, using default=%d\n", ++ env_name, default_value); ++ *out_value = default_value; ++ return -1; ++ } ++ /* Check for trailing junk */ ++ while (isspace((unsigned char)*end)) ++ end++; ++ if (*end != '\0') { ++ fprintf(stderr, ++ "[WARN] %s: trailing characters ignored, using default=%d\n", ++ env_name, default_value); ++ *out_value = default_value; ++ return -1; ++ } ++ ++ if (val > (unsigned long long)INT_MAX) { ++ fprintf(stderr, "[WARN] %s: value too large, capping to %d\n", ++ env_name, max_value); ++ val = (unsigned long long)max_value; ++ } ++ ++ ival = (int)val; ++ if (ival < min_value) ++ ival = min_value; ++ if (ival > max_value) ++ ival = max_value; ++ ++ *out_value = ival; ++ return 0; ++} ++ ++/* ++ * parse_env_nonneg_uint ++ * ---------------------- ++ * Same as parse_env_nonneg_int but operates on unsigned integers. ++ * ++ * Parameters: ++ * env_name - Environment variable name. ++ * default_value - Default unsigned value. ++ * min_value - Minimum allowed uint. ++ * max_value - Maximum allowed uint. ++ * out_value - Output pointer for parsed value. ++ * ++ * Returns: ++ * 0 on success (or default fallback); ++ * -1 on parse failure. ++ */ ++static int parse_env_nonneg_uint( ++ const char *env_name, ++ unsigned int default_value, ++ unsigned int min_value, ++ unsigned int max_value, ++ unsigned int *out_value) ++{ ++ char *end; ++ unsigned long long val; ++ unsigned int uval; ++ const char *s = getenv(env_name); ++ if (!s || !*s) { ++ *out_value = default_value; ++ return 0; ++ } ++ ++ while (isspace((unsigned char)*s)) ++ s++; ++ ++ errno = 0; ++ end = NULL; ++ val = strtoull(s, &end, 10); ++ ++ if (errno == ERANGE || end == s) { ++ fprintf(stderr, "[WARN] %s: invalid number, using default=%u\n", ++ env_name, default_value); ++ *out_value = default_value; ++ return -1; ++ } ++ while (isspace((unsigned char)*end)) ++ end++; ++ if (*end != '\0') { ++ fprintf(stderr, ++ "[WARN] %s: trailing characters ignored, using default=%u\n", ++ env_name, default_value); ++ *out_value = default_value; ++ return -1; ++ } ++ ++ if (val > (unsigned long long)UINT_MAX) ++ val = (unsigned long long)UINT_MAX; ++ ++ uval = (unsigned int)val; ++ if (uval < min_value) ++ uval = min_value; ++ if (uval > max_value) ++ uval = max_value; ++ ++ *out_value = uval; ++ return 0; ++} ++ ++/* ++ * lfs_resume_env_init ++ * -------------------- ++ * Initializes global resume configuration from environment: ++ * GIT_LFS_RESUME_ATTEMPTS ++ * GIT_LFS_RESUME_INTERVAL ++ * Initializes libcurl and set ups curl cleanup once per process ++ * ++ * Called exactly once via pthread_once / InitOnce. ++ */ ++static void lfs_resume_env_init(void) ++{ ++ /* initialize curl once per process */ ++ curl_global_init(CURL_GLOBAL_ALL); ++ /* register cleanup once */ ++ atexit(curl_global_cleanup); ++ ++ parse_env_nonneg_int( ++ "GIT_LFS_RESUME_ATTEMPTS", LFS_RESUME_ATTEMPTS_DEFAULT, 0, ++ LFS_RESUME_ATTEMPTS_MAX, &g_lfs_resume_attempts); ++ ++ parse_env_nonneg_uint( ++ "GIT_LFS_RESUME_INTERVAL", LFS_RESUME_INTERVAL_DEFAULT, 0, ++ LFS_RESUME_INTERVAL_MAX, &g_lfs_resume_interval_secs); ++ ++ /* log resolved config */ ++ fprintf(stdout, "[INFO] LFS resume: attempts=%d interval=%u s\n", ++ g_lfs_resume_attempts, g_lfs_resume_interval_secs); ++} ++ ++#ifdef _WIN32 ++#include ++#define fseeko _fseeki64 ++#define ftello _ftelli64 ++/* ++ * sleep_seconds ++ * -------------- ++ * Cross-platform helper to sleep for a given number of seconds. ++ * ++ * Parameters: ++ * seconds - Number of seconds to sleep. ++ */ ++static void sleep_seconds(unsigned int seconds) ++{ ++ Sleep(seconds * 1000); ++} ++/* ++ * lfs_once_cb_win / lfs_once_cb_posix ++ * ----------------------------------- ++ * One-time initialization wrapper for Windows/POSIX. ++ */ ++#define LFS_ONCE_INIT INIT_ONCE_STATIC_INIT ++static INIT_ONCE lfs_once = LFS_ONCE_INIT; ++static BOOL CALLBACK lfs_once_cb_win(PINIT_ONCE once, PVOID param, PVOID *ctx) ++{ ++ (void)once; ++ (void)param; ++ (void)ctx; ++ lfs_resume_env_init(); ++ return TRUE; ++} ++#elif defined(__ANDROID__) ++/* Android may require _FILE_OFFSET_BITS = 64 and proper headers */ ++#else ++/* POSIX systems (Linux, macOS) */ ++#include ++#include ++static pthread_once_t lfs_once = PTHREAD_ONCE_INIT; ++static void lfs_once_cb_posix(void) ++{ ++ lfs_resume_env_init(); ++} ++static void sleep_seconds(unsigned int seconds) ++{ ++ sleep(seconds); ++} ++#endif ++ ++ ++typedef struct lfs_attrs { ++ char *path; ++ char *full_path; ++ char *workdir; ++ char *lfs_oid; ++ char *lfs_size; ++ char *url; ++ bool is_download; +} lfs_attrs; + -+static size_t get_digit(const char *buffer) ++/* ++ * Allocate a clean, fully-owned structure. ++ */ ++lfs_attrs *lfs_attrs_new(void) ++{ ++ lfs_attrs *a = git__calloc(1, sizeof(lfs_attrs)); ++ return a; /* fields are NULL by calloc */ ++} ++ ++/* Internal helper: replaces a->field with strdup(value). */ ++static int lfs_attrs_replace(char **field, const char *value) ++{ ++ char *dup = NULL; ++ ++ if (value) { ++ dup = git__strdup(value); ++ if (!dup) ++ return -1; ++ } ++ ++ /* Free old field */ ++ git__free(*field); ++ *field = dup; ++ return 0; ++} ++ ++int lfs_attrs_set_path(lfs_attrs *a, const char *path) ++{ ++ return lfs_attrs_replace(&a->path, path); ++} ++ ++int lfs_attrs_set_full_path(lfs_attrs *a, const char *fp) ++{ ++ return lfs_attrs_replace(&a->full_path, fp); ++} ++ ++int lfs_attrs_set_workdir(lfs_attrs *a, const char *wd) ++{ ++ return lfs_attrs_replace(&a->workdir, wd); ++} ++ ++int lfs_attrs_set_oid(lfs_attrs *a, const char *oid) ++{ ++ return lfs_attrs_replace(&a->lfs_oid, oid); ++} ++ ++int lfs_attrs_set_size(lfs_attrs *a, const char *size) ++{ ++ return lfs_attrs_replace(&a->lfs_size, size); ++} ++ ++int lfs_attrs_set_url(lfs_attrs *a, const char *url) ++{ ++ return lfs_attrs_replace(&a->url, url); ++} ++ ++/* ++ * Frees heap-allocated strings referenced by the struct fields. ++ * Parameters: ++ * a - Pointer to lfs_attrs struct. Not freed itself. ++ * Ownership assumption: ++ * - Only call free() on fields that point to heap memory ++ * (malloc/calloc/realloc/strdup). ++ * - If any field is borrowed (e.g., string literal or external buffer), set ++ * it to NULL before calling this function to avoid invalid free(). ++ */ ++void lfs_attrs_free(lfs_attrs *a) ++{ ++ if (!a) ++ return; ++ ++ git__free(a->path); ++ git__free(a->full_path); ++ git__free(a->workdir); ++ git__free(a->lfs_oid); ++ git__free(a->lfs_size); ++ git__free(a->url); ++ ++ a->path = NULL; ++ a->full_path = NULL; ++ a->workdir = NULL; ++ a->lfs_oid = NULL; ++ a->lfs_size = NULL; ++ a->url = NULL; ++ ++ a->is_download = false; ++} ++ ++/* ++ * lfs_attrs_delete ++ * ----------------- ++ * Frees both the struct fields and the struct itself. ++ * ++ * Parameters: ++ * a - Heap-allocated lfs_attrs struct. ++ */ ++void lfs_attrs_delete(lfs_attrs *a) ++{ ++ if (!a) ++ return; ++ lfs_attrs_free(a); ++ git__free(a); ++} ++ ++/* ++ * get_digit ++ * ---------- ++ * Parses a decimal number from a C string using strtoull. ++ * ++ * Parameters: ++ * buffer - The C string containing digits. ++ * ++ * Returns: ++ * The parsed integer on success, ++ * 0 on failure. ++ */ ++static unsigned long long get_digit(const char *buffer) +{ + char *endptr; ++ unsigned long long number; + errno = 0; -+ size_t number = strtoull(buffer, &endptr, 10); ++ if (buffer == NULL) { ++ fprintf(stderr, "\n[ERROR] get_digit on NULL\n"); ++ return 0; ++ } ++ ++ number = strtoull(buffer, &endptr, 10); + + if (errno == ERANGE) { -+ fprintf(stderr, "Conversion error\n"); ++ fprintf(stderr, "\n[ERROR] Conversion error\n"); + } + if (endptr == buffer) { -+ fprintf(stderr, "No digits were found\n"); ++ fprintf(stderr, "\n[ERROR] No digits were found\n"); + } else if (*endptr != '\0') { -+ fprintf(stderr, "Additional characters after number: %s\n", endptr); ++ fprintf(stderr, ++ "\n[ERROR] Additional characters after number: %s\n", ++ endptr); + } + + return number; +} + -+char *append_char_to_buffer(char *existingBuffer, char additionalChar) ++/* ++ * append_cstr_to_buffer ++ * ---------------------- ++ * Allocates a new string consisting of existingBuffer + suffix. ++ * ++ * Parameters: ++ * existingBuffer - Original string. ++ * suffix - String to append. ++ * ++ * Returns: ++ * Newly allocated concatenated string, or NULL on failure. ++ * ++ * Caller must free result. ++ */ ++static char * ++append_cstr_to_buffer(const char *existingBuffer, const char *suffix) +{ -+ size_t existingLength = strlen(existingBuffer); -+ char *newBuffer = (char *)malloc((existingLength + 2) * sizeof(char)); ++ size_t existingLength; ++ size_t suffixLength; ++ size_t newSize; ++ char *newBuffer; ++ if (existingBuffer == NULL || suffix == NULL) { ++ return NULL; ++ } ++ ++ existingLength = strlen(existingBuffer); ++ suffixLength = strlen(suffix); ++ ++ /* +1 for the null terminator */ ++ newSize = existingLength + suffixLength + 1; ++ ++ newBuffer = (char *)malloc(newSize); + if (newBuffer == NULL) { + return NULL; + } -+ strcpy(newBuffer, existingBuffer); -+ newBuffer[existingLength] = additionalChar; -+ newBuffer[existingLength + 1] = '\0'; ++ ++ /* Copy existing and then append suffix */ ++ memcpy(newBuffer, existingBuffer, existingLength); ++ memcpy(newBuffer + existingLength, suffix, suffixLength); ++ newBuffer[newSize - 1] = '\0'; ++ + return newBuffer; +} + -+int get_lfs_info_match( -+ const git_str *output, -+ const char *regexp) ++/* ++ * get_lfs_info_match ++ * ------------------- ++ * Applies a regex to a git_str buffer and extracts the matched substring. ++ * ++ * Parameters: ++ * output - git_str to modify in place (trimmed to match). ++ * regexp - Regular expression to apply. ++ * ++ * Returns: ++ * 0 if match found, ++ * -1 otherwise. ++ */ ++static int get_lfs_info_match(git_str *output, const char *regexp) +{ + int result; -+ git_regexp preg = GIT_REGEX_INIT; ++ git_regexp preg; ++ size_t i; ++ git_regmatch pmatch[2]; ++ ++ preg = GIT_REGEX_INIT; + if ((result = git_regexp_compile(&preg, regexp, 0)) < 0) { + git_regexp_dispose(&preg); + return result; + } + -+ size_t i; -+ git_regmatch pmatch[2]; -+ + if (!git_regexp_search(&preg, output->ptr, 2, pmatch)) { + /* use pmatch data to trim line data */ + i = (pmatch[1].start >= 0) ? 1 : 0; @@ -412,11 +949,208 @@ index 000000000..484811a0c + return -1; +} + ++/* ++ * git_oid_sha256_from_git_str_blob ++ * --------------------------------- ++ * Computes SHA‑256 of a git_str blob and optionally formats: ++ * "oid sha256:" ++ * ++ * Parameters: ++ * out - git_oid output. ++ * input - git_str containing file contents. ++ * pointer_line - Optional output buffer for formatted oid line. ++ * pointer_line_cap- Capacity of pointer_line. ++ * ++ * Returns: ++ * 0 on success, ++ * -1 on error. ++ */ ++static int git_oid_sha256_from_git_str_blob( ++ git_oid *out, ++ const struct git_str *input, ++ char *pointer_line, ++ size_t pointer_line_cap) ++{ ++ git_hash_ctx ctx; ++ size_t CHUNK; ++ unsigned char *p; ++ size_t remaining; ++ ++ if (!out || !input || !input->ptr) { ++ return -1; ++ } ++ ++ if (!pointer_line || ++ pointer_line_cap < (size_t)(strlen("oid sha256:") + 64 + 1)) { ++ return -1; ++ } ++ ++ /* 1) Init SHA-256 hashing context (internal API) */ ++ if (git_hash_ctx_init(&ctx, GIT_HASH_ALGORITHM_SHA256) < 0) { ++ fprintf(stderr, "\n[ERROR] git_hash_ctx_init failed\n"); ++ goto error; ++ } ++ ++ /* 2) Stream the payload in chunks — hash *only* the file bytes. */ ++ CHUNK = 4 * 1024 * 1024; /* 4 MiB */ ++ p = (unsigned char *)input->ptr; ++ remaining = input->size; ++ ++ while (remaining > 0) { ++ size_t n = remaining > CHUNK ? CHUNK : remaining; ++ if (git_hash_update(&ctx, p, n) < 0) { ++ fprintf(stderr, "\n[ERROR] git_hash_update failed\n"); ++ goto error; ++ } ++ p += n; ++ remaining -= n; ++ } ++ ++ /* 3) Finalize into git_oid (32-byte raw digest for SHA-256). */ ++ if (git_hash_final(out->id, &ctx) < 0) { ++ fprintf(stderr, "\n[ERROR] git_hash_final failed\n"); ++ goto error; ++ } ++ ++ /* 4) Optionally format "oid sha256:" for the LFS pointer file. */ ++ if (pointer_line && ++ pointer_line_cap >= (size_t)(strlen("oid sha256:") + 64 + 1)) { ++ char hex[64 + 1]; ++ /* Formats full hex; no NUL added. */ ++ if (git_oid_fmt(hex, out) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failure, git_oid_fmt failed\n"); ++ goto error; ++ } ++ ++ hex[64] = '\0'; ++ snprintf(pointer_line, pointer_line_cap, "oid sha256:%s", hex); ++ } ++ ++ git_hash_ctx_cleanup(&ctx); ++ return 0; ++error: ++ git_hash_ctx_cleanup(&ctx); ++ return -1; ++} ++ ++/* ++ * lfs_remove_id ++ * -------------- ++ * Converts original file content into LFS pointer file: ++ * version ... ++ * oid sha256:... ++ * size ... ++ * ++ * Used in "clean" filter (upload or diff). ++ * ++ * Parameters: ++ * to - Output git_str pointer file. ++ * from - Input git_str original file. ++ * payload - Output payload with initialized lfs_attrs (minimal). ++ * ++ * Returns: ++ * 0 on success, negative on error. ++ */ ++static int lfs_remove_id(git_str *to, const git_str *from, void **payload) ++{ ++ int error = 0; ++ char line[80]; /* 75+ is enough */ ++ git_oid lfs_oid; ++ /* Init the lfs attrs to indicate git lfs clean, currently only diff ++ * support no upload of lfs file supported */ ++ lfs_attrs *la = lfs_attrs_new(); ++ if (!la) ++ return -1; ++ la->is_download = false; ++ ++ *payload = la; ++ if (!from) ++ return -1; ++ ++ /* lfs spec - return empty pointer when the file is empty */ ++ if (from->size == 0) { ++ git_str_init(to, 0); ++ return 0; ++ } ++ ++ /* Use lib git oid to get lfs sha256 */ ++ lfs_oid.type = GIT_OID_SHA256; ++ if (git_oid_sha256_from_git_str_blob( ++ &lfs_oid, from, line, sizeof(line)) < 0) { ++ fprintf(stderr, "\n[ERROR] failure, cannot calculate sha256\n"); ++ return -1; ++ } ++ ++ git_str_init(to, 0); ++ ++ /* 1) version line (LFS spec requires this literal string) */ ++ if ((error = git_str_puts( ++ to, "version https://git-lfs.github.com/spec/v1\n")) < 0) { ++ fprintf(stderr, "\n[ERROR] git_str_puts failed\n"); ++ return error; ++ } ++ ++ /* 2) the oid line passed by caller (must end with '\n') */ ++ if ((error = git_str_puts(to, line)) < 0) { ++ fprintf(stderr, "\n[ERROR] git_str_puts failed\n"); ++ return error; ++ } ++ ++ if (line[strlen(line) - 1] != '\n') { ++ if ((error = git_str_putc(to, '\n')) < 0) { ++ fprintf(stderr, "\n[ERROR] git_str_putc failed\n"); ++ return error; ++ } ++ } ++ ++ /* 3) size line from the original file size */ ++ if ((error = git_str_printf(to, "size %zu\n", from->size)) < 0) { ++ fprintf(stderr, "\n[ERROR] git_str_printf failed\n"); ++ return error; ++ } ++ ++ return 0; ++} ++ ++/* ++ * lfs_insert_id ++ * -------------- ++ * Parses an LFS pointer file, extracts OID and size, populates payload. ++ * Returns the original input unchanged (content of LFS pointer). ++ * ++ * Parameters: ++ * to - Output buffer. ++ * from - LFS pointer file content. ++ * src - Filter source context. ++ * payload - Output lfs_attrs struct. ++ * ++ * Returns: ++ * 0 on success, negative on error. ++ */ +static int lfs_insert_id( -+ git_str *to, const git_str *from, const git_filter_source *src, void** payload) ++ git_str *to, ++ const git_str *from, ++ const git_filter_source *src, ++ void **payload) +{ + git_str lfs_oid = GIT_STR_INIT; + git_str lfs_size = GIT_STR_INIT; ++ git_str full_path = GIT_STR_INIT; ++ const char *obj_regexp = "\noid sha256:(.*)\n"; ++ const char *size_regexp = "\nsize (.*)\n"; ++ git_repository *repo = git_filter_source_repo(src); ++ const char *path = git_filter_source_path(src); ++ const char *workdir = git_repository_workdir(repo); ++ /* Setup memory for payload struct ownership */ ++ lfs_attrs *la = lfs_attrs_new(); ++ if (!la) ++ return -1; ++ ++ lfs_attrs_set_path(la, path); ++ lfs_attrs_set_workdir(la, workdir); ++ lfs_attrs_set_url(la, repo->url); ++ la->is_download = true; + + lfs_oid.size = from->size; + lfs_oid.asize = from->asize; @@ -425,52 +1159,76 @@ index 000000000..484811a0c + lfs_size.asize = from->asize; + lfs_size.ptr = git__strdup(from->ptr); + -+ const char *obj_regexp = "\noid sha256:(.*)\n"; -+ const char *size_regexp = "\nsize (.*)\n"; -+ if (get_lfs_info_match(&lfs_oid, obj_regexp) < 0) ++ if (get_lfs_info_match(&lfs_oid, obj_regexp) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failure, cannot find lfs oid in: %s\n", ++ lfs_oid.ptr); + return -1; -+ -+ if (get_lfs_info_match(&lfs_size, size_regexp) < 0) ++ } ++ lfs_attrs_set_oid(la, lfs_oid.ptr); ++ git_str_dispose(&lfs_oid); ++ if (get_lfs_info_match(&lfs_size, size_regexp) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failure, cannot find lfs size in: %s\n", ++ lfs_size.ptr); + return -1; -+ -+ git_repository *repo = git_filter_source_repo(src); -+ const char *path = git_filter_source_path(src); -+ -+ git_str full_path = GIT_STR_INIT; -+ if (git_repository_workdir_path(&full_path, repo, path) < 0) ++ } ++ lfs_attrs_set_size(la, lfs_size.ptr); ++ git_str_dispose(&lfs_size); ++ if (git_repository_workdir_path(&full_path, repo, path) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failure, cannot get repository path: %s\n", ++ path); + return -1; ++ } ++ lfs_attrs_set_full_path(la, full_path.ptr); ++ git_str_dispose(&full_path); + -+ size_t workdir_size = strlen(git_repository_workdir(repo)); -+ -+ const char *workdir = git_repository_workdir(repo); -+ struct lfs_attrs la = { path, full_path.ptr, workdir, lfs_oid.ptr, lfs_size.ptr, repo->url }; -+ -+ *payload = git__malloc(sizeof(la)); -+ GIT_ERROR_CHECK_ALLOC(*payload); -+ memcpy(*payload, &la, sizeof(la)); ++ *payload = la; + -+ /*Just write the oryginal lfs file contents */ ++ /*Just write the original lfs file contents */ + return git_str_set(to, from->ptr, from->size); +} + ++/* ++ * lfs_apply ++ * ---------- ++ * libgit2 filter entrypoint: ++ * - In SMUDGE mode → download (lfs_insert_id). ++ * - In CLEAN mode → create pointer file (lfs_remove_id). ++ */ +static int lfs_apply( -+ git_filter *self, -+ void **payload, -+ git_str *to, -+ const git_str *from, -+ const git_filter_source *src) ++ git_filter *self, ++ void **payload, ++ git_str *to, ++ const git_str *from, ++ const git_filter_source *src) +{ -+ GIT_UNUSED(self); GIT_UNUSED(payload); ++ GIT_UNUSED(self); ++ GIT_UNUSED(payload); + ++ /* for download of the lfs pointer files */ + if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) + return lfs_insert_id(to, from, src, payload); -+ /*else -+ * PATH for upload lfs files not needed -+ return lfs_remove_id(to, from); -+ */ ++ else ++ /* for upload or diff of the lfs pointer files */ ++ return lfs_remove_id(to, from, payload); + return 0; +} + ++/* ++ * lfs_check ++ * ---------- ++ * Determines whether a given file path should apply the "lfs" filter. ++ * ++ * Parameters: ++ * src - Filter source. ++ * attr_values - Unused. ++ * ++ * Returns: ++ * 0 if filter applies, ++ * GIT_PASSTHROUGH if not. ++ */ +static int lfs_check( + git_filter *self, + void **payload, /* points to NULL ptr on entry, may be set */ @@ -478,12 +1236,13 @@ index 000000000..484811a0c + const char **attr_values) +{ + const char *value; -+ + git_repository *repo = git_filter_source_repo(src); + const char *path = git_filter_source_path(src); + -+ git_attr_get( -+ &value, repo, GIT_ATTR_CHECK_NO_SYSTEM, path, "filter"); ++ GIT_UNUSED(self); ++ GIT_UNUSED(payload); ++ GIT_UNUSED(attr_values); ++ git_attr_get(&value, repo, GIT_ATTR_CHECK_NO_SYSTEM, path, "filter"); + + if (value && *value) { + if (strcmp(value, "lfs") == 0) { @@ -493,20 +1252,23 @@ index 000000000..484811a0c + return GIT_PASSTHROUGH; + } + -+ GIT_UNUSED(self); -+ + return 0; +} + ++/* ++ * lfs_stream ++ * ----------- ++ * Creates a buffered filter stream wrapper around lfs_apply(). ++ */ +static int lfs_stream( -+ git_writestream **out, -+ git_filter *self, -+ void **payload, -+ const git_filter_source *src, -+ git_writestream *next) ++ git_writestream **out, ++ git_filter *self, ++ void **payload, ++ const git_filter_source *src, ++ git_writestream *next) +{ -+ return git_filter_buffered_stream_new(out, -+ self, lfs_apply, NULL, payload, src, next); ++ return git_filter_buffered_stream_new( ++ out, self, lfs_apply, NULL, payload, src, next); +} + +struct progress_data { @@ -526,14 +1288,23 @@ index 000000000..484811a0c +}; + +static const char *sizeUnits[] = { "B", "KB", "MB", "GB", "TB", NULL }; ++/* ++ * print_download_speed_info ++ * -------------------------- ++ * Prints download speed in human-readable units. ++ * ++ * Parameters: ++ * received_size - Bytes downloaded. ++ * elapsed_time - Seconds elapsed. ++ */ +static void print_download_speed_info(size_t received_size, size_t elapsed_time) +{ + double recv_len = (double)received_size; + uint64_t elapsed = (uint64_t)elapsed_time; + double rate; -+ rate = elapsed ? recv_len / elapsed : received_size; -+ + size_t rate_unit_idx = 0; ++ ++ rate = elapsed ? recv_len / elapsed : received_size; + while (rate > 1000 && sizeUnits[rate_unit_idx + 1]) { + rate /= 1000.0; + rate_unit_idx++; @@ -542,29 +1313,42 @@ index 000000000..484811a0c + printf(" [%.2f %s/s] ", rate, sizeUnits[rate_unit_idx]); +} + -+void print_progress( -+ size_t count, -+ size_t max, -+ bool first_run, -+ size_t elapsed_time) ++/* ++ * print_progress ++ * --------------- ++ * Renders a progress bar with percentage, size, and transfer speed. ++ */ ++static void ++print_progress(size_t count, size_t max, bool first_run, size_t elapsed_time) +{ -+ float progress = (float)count / max; ++ double progress; ++ int i, bar_length, bar_width; ++ size_t totalSizeUnitId; ++ double totalSize; ++ if (max == 0) { ++ /* Print received bytes + rate without percentage bar */ ++ printf("\rProgress: [unknown size] "); ++ print_download_speed_info(count, elapsed_time); ++ fflush(stdout); ++ return; ++ } ++ ++ progress = (double)count / max; + if (!first_run && progress < 0.01 && count > 0) + return; + -+ const int bar_width = 50; -+ int bar_length = progress * bar_width; ++ bar_width = 50; ++ bar_length = progress * bar_width; + + printf("\rProgress: ["); -+ int i; + for (i = 0; i < bar_length; ++i) { + printf("#"); + } + for (i = bar_length; i < bar_width; ++i) { + printf(" "); + } -+ size_t totalSizeUnitId = 0; -+ double totalSize = max; ++ totalSizeUnitId = 0; ++ totalSize = max; + while (totalSize > 1000 && sizeUnits[totalSizeUnitId + 1]) { + totalSize /= 1000.0; + totalSizeUnitId++; @@ -577,7 +1361,12 @@ index 000000000..484811a0c + fflush(stdout); +} + -+int progress_callback( ++/* ++ * progress_callback ++ * ------------------ ++ * cURL progress callback wrapper to throttle progress prints. ++ */ ++static int progress_callback( + void *clientp, + curl_off_t dltotal, + curl_off_t dlnow, @@ -585,12 +1374,16 @@ index 000000000..484811a0c + curl_off_t ulnow) +{ + struct progress_data *pcs = (struct progress_data *)clientp; ++ time_t currentTime = time(NULL); ++ bool shouldPrintDueToTime = false; ++ GIT_UNUSED(ulnow); ++ GIT_UNUSED(ultotal); + if (dlnow == 0) { + pcs->started_download = time(NULL); + pcs->last_print_time = time(NULL); + } -+ time_t currentTime = time(NULL); -+ bool shouldPrintDueToTime = (currentTime - pcs->last_print_time >= 1); ++ ++ shouldPrintDueToTime = (currentTime - pcs->last_print_time >= 1); + if ((dltotal == dlnow) && dltotal < 10000) { + /* Usually with first messages we don't get the full size and we + don't want to print progress bar so we assume that until @@ -598,7 +1391,8 @@ index 000000000..484811a0c + we would print 100% progress bar */ + return 0; + } -+ /* called multiple times, so we want to print progress bar only once reached 100% */ ++ /* called multiple times, so we want to print progress bar only once ++ * reached 100% */ + if (pcs->fullDownloadPrinted) { + return 0; + } @@ -616,22 +1410,36 @@ index 000000000..484811a0c + return 0; +} + -+static size_t file_write_callback(void *buffer, size_t size, size_t nmemb, void *stream) ++/* ++ * file_write_callback ++ * -------------------- ++ * cURL write callback writing received bytes to disk. ++ */ ++static size_t ++file_write_callback(void *buffer, size_t size, size_t nmemb, void *stream) +{ + struct FtpFile *out = (struct FtpFile *)stream; ++ size_t written_items; + if (!out->stream) { + /* open file for writing */ + out->stream = fopen(out->filename, "wb"); + if (!out->stream) { -+ fprintf(stderr, "failure, cannot open file to write: %s\n", ++ fprintf(stderr, ++ "\n[ERROR] failure, cannot open file to write: %s\n", + out->filename); + return 0; /* failure, cannot open file to write */ + } + } -+ -+ return fwrite(buffer, size, nmemb, out->stream); ++ ++ written_items = fwrite(buffer, size, nmemb, out->stream); ++ return written_items * size; /* return BYTES written */ +} + ++/* ++ * write_callback ++ * --------------- ++ * cURL callback for accumulating HTTP response into memory. ++ */ +static size_t write_callback(void *ptr, size_t size, size_t nmemb, void *userp) +{ + size_t realsize = size * nmemb; @@ -650,7 +1458,12 @@ index 000000000..484811a0c + return realsize; +} + -+static void print_download_info(const char* filename, size_t bytes) ++/* ++ * print_download_info ++ * -------------------- ++ * Prints human-readable file size before downloading. ++ */ ++static void print_download_info(const char *filename, size_t bytes) +{ + double recv_len = (double)bytes; + size_t recv_unit_idx = 0; @@ -659,8 +1472,7 @@ index 000000000..484811a0c + recv_unit_idx++; + } + printf("\nDownloading lfs size: %.2f %s file: %s\n", recv_len, -+ sizeUnits[recv_unit_idx], -+ filename); ++ sizeUnits[recv_unit_idx], filename); +} + +#define CURL_SETOPT(setopt) \ @@ -668,223 +1480,489 @@ index 000000000..484811a0c + status = setopt; \ + } + -+/** -+ * lfs_download - Downloads a file using the LFS (Large File Storage) mechanism. ++/* ++ * curl_resume_url_execute ++ * ------------------------ ++ * Attempts to resume an interrupted download using HTTP Range. + * -+ * This function performs the following steps: -+ * 1. Validates the input payload and initializes necessary resources. -+ * 2. Constructs a temporary output file path for the download. -+ * 3. Initializes CURL handles for HTTP requests. -+ * 4. Performs the download and writes the data to the temporary file. -+ * 5. Cleans up resources and handles errors appropriately. ++ * Parameters: ++ * dl_curl - CURL handle. ++ * ftpfile - Target file stream + filename. + * -+ * Error Handling: -+ * - If the payload is NULL, the function logs an error and returns immediately. -+ * - CURL initialization and operations are checked for errors, and appropriate cleanup is performed. -+ * - File operations are validated to ensure successful creation and writing. ++ * Returns: ++ * cURL result code. ++ */ ++static int curl_resume_url_execute(CURL *dl_curl, struct FtpFile *ftpfile) ++{ ++ /* This helper check does not work but the resume mechanism still works ++ curl_off_t resume_from = 0; ++ curl_easy_getinfo( ++ dl_curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &resume_from); ++ ++ if (resume_from == -1) { ++ fprintf(stderr, ++ "\n[ERROR] curl_easy_perform() failed with transferred a ++ partial file error and server does not support range/resume.\n"); } else ++ { ++ */ ++ CURLcode res; ++ curl_off_t offset = 0; ++ printf("\n[INFO] curl_easy_perform() trying to resume file download\n"); ++ if (ftpfile->stream) { ++ fclose(ftpfile->stream); ++ } ++ ftpfile->stream = fopen(ftpfile->filename, "ab+"); ++ if (ftpfile->stream) { ++ if (fseeko(ftpfile->stream, 0, SEEK_END) == 0) { ++ off_t pos = ftello(ftpfile->stream); ++ if (pos > 0) ++ offset = (curl_off_t)pos; ++ } ++ /* Do not close the file because we want to append binary to the ++ existing file ++ fclose(ftpfile->stream);*/ ++ } else { ++ fprintf(stderr, "\n[ERROR] Cannot open file %s\n", ++ ftpfile->filename); ++ return -1; ++ } ++ ++ /* Tell libcurl to resume */ ++ curl_easy_setopt(dl_curl, CURLOPT_RESUME_FROM_LARGE, offset); ++ /* Perform the request, res gets the return code */ ++ ++ /* Perform the request, res gets the return code */ ++ res = curl_easy_perform(dl_curl); ++ ++ /* Validate that server honored Range (206) when offset > 0 */ ++ if (res == CURLE_OK && offset > 0) { ++ long http_code = 0; ++ curl_easy_getinfo(dl_curl, CURLINFO_RESPONSE_CODE, &http_code); ++ if (http_code != 206) { ++ /* Server did not honor resume — caller may choose to ++ * retry or restart */ ++ fprintf(stderr, ++ "\n[WARN] Server did not return 206 for resumed request (HTTP %ld)\n", ++ http_code); ++ } ++ } ++ ++ return res; ++} ++ ++/* ++ * download_with_resume ++ * --------------------- ++ * Retries resuming a download multiple times with delay between attempts. ++ * ++ * Parameters: ++ * dl_curl - CURL download handle. ++ * ftpfile - FtpFile handle. ++ * max_retries - Number of attempts. ++ * interval_seconds - Delay between attempts. + * -+ * @param self: Unused parameter, reserved for future use. -+ * @param payload: Pointer to the lfs_attrs structure containing download parameters. ++ * Returns: ++ * Final attempt's cURL code. + */ -+ static void lfs_download(git_filter *self, void *payload) ++static CURLcode download_with_resume( ++ CURL *dl_curl, ++ struct FtpFile *ftpfile, ++ int max_retries, ++ unsigned int interval_seconds) +{ -+ GIT_UNUSED(self); -+ if (!payload) { -+ fprintf(stderr, "lfs payload not initialized"); -+ return; ++ CURLcode res = CURLE_OK; ++ int attempt; ++ for (attempt = 1; attempt <= max_retries; ++attempt) { ++ res = curl_resume_url_execute(dl_curl, ftpfile); ++ ++ if (res == CURLE_OK) { ++ /* Success */ ++ if (attempt > 1) ++ printf("[INFO] Resume attempt %d succeeded\n", ++ attempt); ++ return CURLE_OK; ++ } ++ ++ fprintf(stderr, "[WARN] Resume attempt %d/%d failed: %s\n", ++ attempt, max_retries, curl_easy_strerror(res)); ++ ++ if (attempt < max_retries) { ++ printf("[INFO] Waiting %u seconds before next resume attempt...\n", ++ interval_seconds); ++ fflush(stdout); ++ sleep_seconds(interval_seconds); ++ } + } -+ struct lfs_attrs *la = (struct lfs_attrs *)payload; -+ char *tmp_out_file = append_char_to_buffer(la->full_path, '2'); + -+ CURL *info_curl,*dl_curl; ++ return res; /* last result (failure) */ ++} ++ ++/* ++ * lfs_download ++ * ------------- ++ * Full LFS download implementation. ++ * Performs: ++ * - batch API request ++ * - parsing download link ++ * - actual file download (with resume support) ++ * - renaming into place ++ * ++ * Parameters: ++ * self - Filter pointer (unused). ++ * payload - Populated lfs_attrs struct with OID, size, path, etc. ++ */ ++static void lfs_download(git_filter *self, void *payload) ++{ ++ struct lfs_attrs *la = (struct lfs_attrs *)payload; ++ char *tmp_out_file = NULL; ++ CURL *info_curl = NULL; ++ CURL *dl_curl = NULL; + CURLcode res = CURLE_OK; + CURLcode status = CURLE_OK; + git_str res_str = GIT_STR_INIT; ++ git_str lfs_info_url = GIT_STR_INIT; ++ git_str lfs_info_data = GIT_STR_INIT; ++ bool resumingFileByBlobFilter = false; ++ struct progress_data progress_d = { 0 }; ++ struct memory response = { 0 }; ++ struct curl_slist *chunk = NULL; ++ struct FtpFile ftpfile = { 0 }; ++ const char *href_regexp = ++ "\"download\"\\s*:\\s*\\{\\s*\"href\":\"([^\"]+)\""; ++ GIT_UNUSED(self); ++ if (!la) { ++ goto cleanup; ++ } ++ ++ /* Currently only download is supoprted, no lfs file upload */ ++ if (!la->is_download) { ++ goto done; ++ } ++ ++ tmp_out_file = append_cstr_to_buffer(la->full_path, "lfs_part"); ++ if (tmp_out_file == NULL) { ++ fprintf(stderr, "\n[ERROR] lfs create temp filename failed\n"); ++ goto cleanup; ++ } ++ ftpfile.filename = tmp_out_file; ++ + /* get a curl handle */ + info_curl = curl_easy_init(); -+ if (info_curl) { -+ struct curl_slist *chunk = NULL; -+ git_str lfs_info_url = GIT_STR_INIT; -+ if (git_str_join( -+ &lfs_info_url, '.', -+ la->url, -+ "git/info/lfs/objects/batch") < 0) { -+ fprintf(stderr, "failed to create url '%s'", -+ la->full_path); -+ goto on_error; -+ } ++ if (!info_curl) { ++ fprintf(stderr, "[ERROR] curl_easy_init(info_curl) failed\n"); ++ goto cleanup; ++ } + -+ /* Remove a header curl would otherwise add by itself */ -+ chunk = curl_slist_append(chunk, "Accept: application/vnd.git-lfs+json"); -+ /* Add a custom header */ -+ chunk = curl_slist_append(chunk, "Content-Type: application/vnd.git-lfs+json"); -+ /* set our custom set of headers */ -+ CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_HTTPHEADER, chunk)); -+ /* First set the URL that is about to receive our POST. This URL -+ can just as well be an https:// URL if that is what should -+ receive the data. */ -+ CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_URL, lfs_info_url.ptr)); -+ -+ if (status != CURLE_OK) { -+ fprintf(stderr, "curl_easy_setopt() failed: %s\n", curl_easy_strerror(status)); -+ goto info_cleaup; -+ } -+ git_str lfs_info_data = GIT_STR_INIT; -+ -+ /* "{\"operation\":\"download\",\"transfer\":[\"basic\"],\"objects\":[{\"oid\":\"9556d0a12310629e217450ac4198c49f5457f1a69e22ce7c9f8e81fab4d530a7\",\"size\":499723}]}" */ -+ if (git_str_join_n( -+ &lfs_info_data, '"',5, -+ "{\"operation\":\"download\",\"transfer\":[\"basic\"],\"objects\":[{\"oid\":", -+ la->lfs_oid, -+ ",\"size\":", -+ la->lfs_size, -+ "}]}" ) < 0) { -+ fprintf(stderr, "failed to create url '%s'", -+ la->full_path); -+ /* always cleanup */ -+ curl_easy_cleanup(info_curl); -+ /* free the custom headers */ -+ curl_slist_free_all(chunk); -+ goto on_error; -+ } ++ if (git_str_join( ++ &lfs_info_url, '.', la->url, ++ "git/info/lfs/objects/batch") < 0) { ++ fprintf(stderr, "\n[ERROR] failed to create url '%s'\n", ++ la->full_path); ++ goto cleanup; ++ } + -+ /* Now specify the POST data */ -+ CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_POSTFIELDS, lfs_info_data.ptr)); -+ CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA)); -+ CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_FOLLOWLOCATION, 1L)); ++ /* Remove a header curl would otherwise add by itself */ ++ chunk = curl_slist_append( ++ chunk, "Accept: application/vnd.git-lfs+json"); ++ /* Add a custom header */ ++ chunk = curl_slist_append( ++ chunk, "Content-Type: application/vnd.git-lfs+json"); ++ /* set our custom set of headers */ ++ CURL_SETOPT( ++ curl_easy_setopt(info_curl, CURLOPT_HTTPHEADER, chunk)); ++ /* First set the URL that is about to receive our POST. This URL ++ can just as well be an https:// URL if that is what should ++ receive the data. */ ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_URL, lfs_info_url.ptr)); ++ /* Add cURL resiliency */ ++ /* unlimited data */ ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_CONNECTTIMEOUT, 30L)); ++ /* timeout */ ++ CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_TIMEOUT, 0L)); ++ /* low speed 1KB/s */ ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_LOW_SPEED_LIMIT, 1024L)); ++ /* for 30s */ ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_LOW_SPEED_TIME, 30L)); ++ ++ if (status != CURLE_OK) { ++ fprintf(stderr, ++ "\n[ERROR] curl_easy_setopt() failed: %s\n", ++ curl_easy_strerror(status)); ++ goto cleanup; ++ } + -+ struct memory response = { 0 }; -+ CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_WRITEFUNCTION, write_callback)); -+ CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_USERAGENT,"git-lfs/3.5.0")); -+ CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_WRITEDATA, (void *)&response)); ++ /* "{\"operation\":\"download\",\"transfer\":[\"basic\"],\"objects\":[{\"oid\":\"9556d0a12310629e217450ac4198c49f5457f1a69e22ce7c9f8e81fab4d530a7\",\"size\":499723}]}" ++ */ ++ if (git_str_join_n( ++ &lfs_info_data, '"', 5, ++ "{\"operation\":\"download\",\"transfer\":[\"basic\"],\"objects\":[{\"oid\":", ++ la->lfs_oid, ",\"size\":", la->lfs_size, ++ "}]}") < 0) { ++ fprintf(stderr, "\n[ERROR] failed to create url '%s'\n", ++ la->full_path); ++ goto cleanup; ++ } + -+ if (status != CURLE_OK) { -+ fprintf(stderr, "curl_easy_setopt() failed: %s\n", curl_easy_strerror(status)); -+ goto info_cleaup; -+ } -+ /* Perform the request, res gets the return code */ -+ res = curl_easy_perform(info_curl); -+ /* Check for errors */ -+ if (res != CURLE_OK) { -+ fprintf(stderr, "curl_easy_perform() failed: %s\n", -+ curl_easy_strerror(res)); -+ /* always cleanup */ -+ curl_easy_cleanup(info_curl); -+ /* free the custom headers */ -+ curl_slist_free_all(chunk); -+ goto on_error; -+ } ++ /* Now specify the POST data */ ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_POSTFIELDS, lfs_info_data.ptr)); ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA)); ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_FOLLOWLOCATION, 1L)); ++ ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_WRITEFUNCTION, write_callback)); ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_USERAGENT, "git-lfs/3.5.0")); ++ CURL_SETOPT(curl_easy_setopt( ++ info_curl, CURLOPT_WRITEDATA, (void *)&response)); ++ ++ if (status != CURLE_OK) { ++ fprintf(stderr, ++ "\n[ERROR] curl_easy_setopt() failed: %s\n", ++ curl_easy_strerror(status)); ++ goto cleanup; ++ } ++ /* Perform the request, res gets the return code */ ++ res = curl_easy_perform(info_curl); ++ /* Check for errors */ ++ if (res != CURLE_OK) { ++ fprintf(stderr, ++ "\n[ERROR] curl_easy_perform() failed: %s\n", ++ curl_easy_strerror(res)); ++ goto cleanup; ++ } + -+ /* Get response data */ -+ res_str.asize = response.size; -+ res_str.size = response.size; -+ res_str.ptr = git__strdup(response.response); -+ info_cleaup: -+ /* always cleanup */ -+ curl_easy_cleanup(info_curl); -+ /* free the custom headers */ -+ curl_slist_free_all(chunk); -+ if (status != CURLE_OK) -+ goto on_error; ++ /* Copy response JSON */ ++ if (response.response) { ++ git_str_set(&res_str, response.response, response.size); + } + + /* get a curl handle */ -+ dl_curl = curl_easy_init(); -+ if (dl_curl) { -+ struct FtpFile ftpfile = { tmp_out_file, NULL }; -+ -+ const char *href_regexp = "\"download\"\\s*:\\s*\\{\\s*\"href\":\"([^\"]+)\""; -+ if (get_lfs_info_match(&res_str, href_regexp) < 0) { -+ /* always cleanup */ -+ curl_easy_cleanup(dl_curl); -+ goto on_error; -+ } -+ /* First set the URL that is about to receive our POST. This URL -+ can just as well be an https:// URL if that is what should -+ receive the data. */ -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_URL, res_str.ptr)); -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA)); -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_FOLLOWLOCATION, 1L)); -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_USE_SSL, CURLUSESSL_ALL)); -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_USERAGENT,"git-lfs/3.5.0")); -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_WRITEFUNCTION, file_write_callback)); -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_WRITEDATA, (void *)&ftpfile)); -+ -+ /* progress bar options */ -+ struct progress_data progress_d = { time(NULL), time(NULL) , false }; -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_NOPROGRESS, 0L)); -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_XFERINFOFUNCTION, progress_callback)); -+ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_XFERINFODATA, &progress_d)); -+ -+ if (status != CURLE_OK) { -+ fprintf(stderr, "curl_easy_setopt() failed: %s\n", curl_easy_strerror(status)); -+ curl_easy_cleanup(dl_curl); -+ goto on_error; -+ } -+ print_download_info(la->full_path, get_digit(la->lfs_size)); ++ dl_curl = curl_easy_init(); ++ if (!dl_curl) { ++ fprintf(stderr, "[ERROR] curl_easy_init(dl_curl) failed\n"); ++ goto cleanup; ++ } ++ ++ if (get_lfs_info_match(&res_str, href_regexp) < 0) { ++ fprintf(stderr, "[ERROR] Cannot extract LFS download URL\n"); ++ goto cleanup; ++ } ++ /* Progress info */ ++ progress_d.started_download = time(NULL); ++ progress_d.last_print_time = time(NULL); ++ /* First set the URL that is about to receive our POST. This URL ++ can just as well be an https:// URL if that is what should ++ receive the data. */ ++ CURL_SETOPT( ++ curl_easy_setopt(dl_curl, CURLOPT_URL, res_str.ptr)); ++ CURL_SETOPT(curl_easy_setopt( ++ dl_curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA)); ++ CURL_SETOPT( ++ curl_easy_setopt(dl_curl, CURLOPT_FOLLOWLOCATION, 1L)); ++ CURL_SETOPT(curl_easy_setopt( ++ dl_curl, CURLOPT_USE_SSL, CURLUSESSL_ALL)); ++ CURL_SETOPT(curl_easy_setopt( ++ dl_curl, CURLOPT_USERAGENT, "git-lfs/3.5.0")); ++ CURL_SETOPT(curl_easy_setopt( ++ dl_curl, CURLOPT_WRITEFUNCTION, file_write_callback)); ++ CURL_SETOPT(curl_easy_setopt( ++ dl_curl, CURLOPT_WRITEDATA, (void *)&ftpfile)); ++ ++ /* progress bar options */ ++ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_NOPROGRESS, 0L)); ++ CURL_SETOPT(curl_easy_setopt( ++ dl_curl, CURLOPT_XFERINFOFUNCTION, progress_callback)); ++ CURL_SETOPT(curl_easy_setopt( ++ dl_curl, CURLOPT_XFERINFODATA, &progress_d)); ++ ++ /* Add cURL resiliency */ ++ /* unlimited data */ ++ CURL_SETOPT( ++ curl_easy_setopt(dl_curl, CURLOPT_CONNECTTIMEOUT, 30L)); ++ /* timeout */ ++ CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_TIMEOUT, 0L)); ++ /* low speed 1KB/s */ ++ CURL_SETOPT(curl_easy_setopt( ++ dl_curl, CURLOPT_LOW_SPEED_LIMIT, 1024L)); ++ /* for 30s */ ++ CURL_SETOPT( ++ curl_easy_setopt(dl_curl, CURLOPT_LOW_SPEED_TIME, 30L)); ++ if (status != CURLE_OK) { ++ fprintf(stderr, ++ "\n[ERROR] curl_easy_setopt() failed: %s\n", ++ curl_easy_strerror(status)); ++ goto cleanup; ++ } ++ ++ /* Check for resume if previous download failed and we have the ++ * partial file on disk */ ++ ftpfile.stream = fopen(ftpfile.filename, "r"); ++ if (ftpfile.stream != NULL) { ++ resumingFileByBlobFilter = true; ++ fclose(ftpfile.stream); ++ ftpfile.stream = NULL; ++ ++ /* First try a resume sequence */ ++ res = download_with_resume( ++ dl_curl, &ftpfile, g_lfs_resume_attempts, ++ g_lfs_resume_interval_secs); ++ } else { ++ print_download_info( ++ la->full_path, get_digit(la->lfs_size)); + /* Perform the request, res gets the return code */ + res = curl_easy_perform(dl_curl); -+ /* Check for errors */ -+ if (res != CURLE_OK) { -+ fprintf(stderr, "curl_easy_perform() failed: %s\n", -+ curl_easy_strerror(res)); -+ if (ftpfile.stream) -+ fclose(ftpfile.stream); -+ /* always cleanup */ -+ curl_easy_cleanup(dl_curl); -+ goto on_error; -+ } ++ } + -+ if (ftpfile.stream) ++ /* Check for resume of partial download error */ ++ if (res == CURLE_PARTIAL_FILE) { ++ fprintf(stderr, ++ "[WARN] Got CURLE_PARTIAL_FILE, attempting resume sequence\n"); ++ res = download_with_resume( ++ dl_curl, &ftpfile, g_lfs_resume_attempts, ++ g_lfs_resume_interval_secs); ++ } ++ ++ /* Check for errors */ ++ if (res != CURLE_OK) { ++ fprintf(stderr, ++ "\n[ERROR] curl_easy_perform() failed: %s\n", ++ curl_easy_strerror(res)); ++ /* Very important to close the file to write any bytes downloaded */ ++ if (ftpfile.stream) { + fclose(ftpfile.stream); -+ /* always cleanup */ -+ curl_easy_cleanup(dl_curl); ++ ftpfile.stream = NULL; ++ } ++ goto cleanup; + } + -+ /* Remove lfs file and rename downloaded file to oryginal lfs filename */ -+ if (p_unlink(la->full_path) < 0) { -+ fprintf(stderr, "failed to delete file '%s'", la->full_path); -+ goto on_error; ++ /* Very important to close the file to write any bytes downloaded */ ++ if (ftpfile.stream) { ++ fclose(ftpfile.stream); ++ ftpfile.stream = NULL; ++ } ++ ++ /* Remove lfs file and rename downloaded file to original lfs filename */ ++ if (!resumingFileByBlobFilter) { ++ /* File does not exist when using blob filters */ ++ if (p_unlink(la->full_path) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failed to delete file '%s'\n", ++ la->full_path); ++ /* Ignore error here, react on next error */ ++ } + } + + if (p_rename(tmp_out_file, la->full_path) < 0) { -+ fprintf(stderr, "failed to rename file to '%s'", la->full_path); -+ goto on_error; ++ fprintf(stderr, "\n[ERROR] failed to rename file to '%s'\n", ++ la->full_path); ++ goto cleanup; ++ } ++ ++ /* ++ * SUCCESS ++ */ ++ goto done; ++ /* ++ * ---------------------------------------------------- ++ * Cleanup block — ALWAYS EXECUTED ++ * ---------------------------------------------------- ++ */ ++cleanup: ++ fprintf(stderr, "[ERROR] LFS download failed for %s\n", ++ la ? la->full_path : "(null)"); ++done: ++ /* Close stream if open */ ++ if (ftpfile.stream) { ++ fclose(ftpfile.stream); ++ ftpfile.stream = NULL; + } -+ git__free(payload); -+ return; + -+on_error: -+ git__free(payload); -+ fprintf(stderr, "LFS download failed for file %s\n", la->full_path); -+ return; ++ /* Free temporary file name */ ++ free(tmp_out_file); ++ /* Libgit2 strings */ ++ git_str_dispose(&lfs_info_url); ++ git_str_dispose(&lfs_info_data); ++ git_str_dispose(&res_str); ++ /* Free memory buffer for batch response */ ++ free(response.response); ++ /* cURL cleanup */ ++ if (info_curl) ++ curl_easy_cleanup(info_curl); ++ if (dl_curl) ++ curl_easy_cleanup(dl_curl); ++ if (chunk) ++ curl_slist_free_all(chunk); ++ ++ /* Free payload */ ++ if (la) ++ lfs_attrs_delete(la); ++ ++ fflush(stdout); ++ fflush(stderr); +} + -+void git_lfs_filter_free(git_filter *filter) ++/* ++ * git_lfs_filter_free ++ * -------------------- ++ * Frees filter instance and performs CURL cleanup. ++ */ ++static void git_lfs_filter_free(git_filter *filter) +{ + curl_global_cleanup(); + git__free(filter); +} + ++/* ++ * lfs_resume_env_init_once ++ * ------------------------- ++ * Runs environment configuration initializer exactly once per process. ++ */ ++static void lfs_resume_env_init_once(void) ++{ ++#ifdef _WIN32 ++ InitOnceExecuteOnce(&lfs_once, lfs_once_cb_win, NULL, NULL); ++#else ++ pthread_once(&lfs_once, lfs_once_cb_posix); ++#endif ++} ++ ++/* ++ * git_lfs_filter_new ++ * ------------------- ++ * Creates and initializes the LFS filter struct used by libgit2. ++ * ++ * Returns: ++ * Pointer to new git_filter struct, or NULL on allocation error. ++ */ +git_filter *git_lfs_filter_new(void) +{ -+ /* In Windows, this inits the Winsock stuff */ -+ curl_global_init(CURL_GLOBAL_ALL); + git_filter *f = git__calloc(1, sizeof(git_filter)); + if (f == NULL) + return NULL; + ++ /* In Windows, this inits the Winsock stuff */ ++ curl_global_init(CURL_GLOBAL_ALL); ++ /* Initialize env-config exactly once per process */ ++ lfs_resume_env_init_once(); ++ + f->version = GIT_FILTER_VERSION; + f->attributes = "lfs"; + f->shutdown = git_lfs_filter_free; -+ f->stream = lfs_stream; ++ f->stream = lfs_stream; + f->check = lfs_check; + f->cleanup = lfs_download; + + return f; +} diff --git a/src/libgit2/repository.c b/src/libgit2/repository.c -index 73876424a..6c267bc98 100644 +index 73876424a..f374d7f51 100644 --- a/src/libgit2/repository.c +++ b/src/libgit2/repository.c @@ -190,6 +190,7 @@ void git_repository_free(git_repository *repo) @@ -895,6 +1973,23 @@ index 73876424a..6c267bc98 100644 git__memzero(repo, sizeof(*repo)); git__free(repo); +@@ -1104,6 +1105,16 @@ static int repo_is_worktree(unsigned *out, const git_repository *repo) + return error; + } + ++int git_repository_set_url( ++ git_repository *repo, ++ const char *url) ++{ ++ GIT_ASSERT_ARG(repo); ++ GIT_ASSERT_ARG(url); ++ repo->url = git__strdup(url); ++ return 0; ++} ++ + int git_repository_open_ext( + git_repository **repo_ptr, + const char *start_path, diff --git a/src/libgit2/repository.h b/src/libgit2/repository.h index fbf143894..1890c61c1 100644 --- a/src/libgit2/repository.h diff --git a/third_party/libgit2/libgit2_engine.bzl b/third_party/libgit2/libgit2_engine.bzl index 605eb0edca..b4713bce57 100644 --- a/third_party/libgit2/libgit2_engine.bzl +++ b/third_party/libgit2/libgit2_engine.bzl @@ -51,6 +51,7 @@ def _impl(repository_ctx): out_static = "out_interface_libs = [\"{lib_name}.lib\"],".format(lib_name=lib_name) out_libs = "out_shared_libs = [\"{lib_name}.dll\"],".format(lib_name=lib_name) cache_entries = """ + "EXPERIMENTAL_SHA256": "ON", "CMAKE_POSITION_INDEPENDENT_CODE": "ON", "CMAKE_CXX_FLAGS": " /guard:cf /GS -s -D_GLIBCXX_USE_CXX11_ABI=1", "CMAKE_LIBRARY_OUTPUT_DIRECTORY": "Debug", @@ -66,6 +67,7 @@ def _impl(repository_ctx): out_static = "" out_libs = "out_shared_libs = [\"{lib_name}.so\"],".format(lib_name=lib_name) cache_entries = """ + "EXPERIMENTAL_SHA256": "ON", "CMAKE_POSITION_INDEPENDENT_CODE": "ON", "CMAKE_CXX_FLAGS": " /guard:cf -s -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=deprecated-declarations -Wuninitialized", "CMAKE_ARCHIVE_OUTPUT_DIRECTORY": "lib",