From c417af469f9aa3da8dfef78f996c0fb8c5d1f4c2 Mon Sep 17 00:00:00 2001 From: "GPT 5.5" Date: Wed, 29 Apr 2026 05:47:57 +0800 Subject: [PATCH 1/6] reject control chars in written values in configuration Reject CR, LF, and NUL in GitConfigParser values before writing them to git config files (which also is a deviation from Git which escapes them). GitConfigParser._write() serializes embedded newlines as indented continuation lines by replacing "\n" with "\n\t". Git itself skips leading whitespace before parsing config tokens, so an injected value such as: foo [core] hooksPath=/tmp/hooks is written in a form where the indented "[core]" line is still parsed by Git as a real section header. This lets attacker-controlled input passed to config_writer().set_value() poison repository config, including core.hooksPath, and redirect hook execution for later Git operations. Fail closed instead of stripping or normalizing these characters. Silent normalization can hide unsanitized caller input, and GitPython does not currently round-trip Git-style escaped values such as "\n" as embedded newlines. Apply the validation to set_value(), add_value(), and the public set() path so callers cannot bypass the safer helper API. Add regression tests for the advisory payload and for CR, LF, NUL, and bytes values. This preserves existing read behavior for config files that already contain multiline values while preventing GitPython from writing new unsafe values. Co-authored-by: Sebastian Thiel --- git/config.py | 24 ++++++++++++++++++++++-- test/test_config.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/git/config.py b/git/config.py index c6eaf8f7b..31d9e01cd 100644 --- a/git/config.py +++ b/git/config.py @@ -882,6 +882,24 @@ def _value_to_string(self, value: Union[str, bytes, int, float, bool]) -> str: return str(value) return force_text(value) + def _value_to_string_safe(self, value: Union[str, bytes, int, float, bool]) -> str: + value_str = self._value_to_string(value) + if re.search(r"[\r\n\x00]", value_str): + raise ValueError("Git config values must not contain CR, LF, or NUL") + return value_str + + @needs_values + @set_dirty_and_flush_changes + def set( + self, + section: str, + option: str, + value: Union[str, bytes, int, float, bool, None] = None, + ) -> None: + if value is not None: + value = self._value_to_string_safe(value) + return super().set(section, option, value) + @needs_values @set_dirty_and_flush_changes def set_value(self, section: str, option: str, value: Union[str, bytes, int, float, bool]) -> "GitConfigParser": @@ -902,9 +920,10 @@ def set_value(self, section: str, option: str, value: Union[str, bytes, int, flo :return: This instance """ + value_str = self._value_to_string_safe(value) if not self.has_section(section): self.add_section(section) - self.set(section, option, self._value_to_string(value)) + self.set(section, option, value_str) return self @needs_values @@ -929,9 +948,10 @@ def add_value(self, section: str, option: str, value: Union[str, bytes, int, flo :return: This instance """ + value_str = self._value_to_string_safe(value) if not self.has_section(section): self.add_section(section) - self._sections[section].add(option, self._value_to_string(value)) + self._sections[section].add(option, value_str) return self def rename_section(self, section: str, new_name: str) -> "GitConfigParser": diff --git a/test/test_config.py b/test/test_config.py index 11ea52d16..a9dcdb087 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -150,6 +150,39 @@ def test_config_value_with_trailing_new_line(self): git_config = GitConfigParser(config_file) git_config.read() # This should not throw an exception + @with_rw_directory + def test_set_value_rejects_config_injection(self, rw_dir): + config_path = osp.join(rw_dir, "config") + payload = "foo\n[core]\nhooksPath=/tmp/hooks" + + with GitConfigParser(config_path, read_only=False) as git_config: + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value("user", "name", payload) + + with GitConfigParser(config_path, read_only=True) as git_config: + self.assertFalse(git_config.has_section("user")) + self.assertFalse(git_config.has_section("core")) + + @with_rw_directory + def test_set_and_add_value_reject_unsafe_value_characters(self, rw_dir): + config_path = osp.join(rw_dir, "config") + bad_values = ("foo\rbar", "foo\nbar", "foo\x00bar", b"foo\nbar") + + with GitConfigParser(config_path, read_only=False) as git_config: + git_config.add_section("user") + for bad_value in bad_values: + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set("user", "name", bad_value) + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value("user", "name", bad_value) + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.add_value("user", "name", bad_value) + + git_config.set_value("user", "name", "safe") + + with GitConfigParser(config_path, read_only=True) as git_config: + self.assertEqual(git_config.get_value("user", "name"), "safe") + def test_base(self): path_repo = fixture_path("git_config") path_global = fixture_path("git_config_global") From 8e24503b42c1d63dd98e8b2e6a2f655bdd0821e3 Mon Sep 17 00:00:00 2001 From: "GPT 5.5" Date: Wed, 29 Apr 2026 06:39:02 +0800 Subject: [PATCH 2/6] avoid duplicate validation in set_value Co-authored-by: Sebastian Thiel --- git/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git/config.py b/git/config.py index 31d9e01cd..97ae054e5 100644 --- a/git/config.py +++ b/git/config.py @@ -923,7 +923,7 @@ def set_value(self, section: str, option: str, value: Union[str, bytes, int, flo value_str = self._value_to_string_safe(value) if not self.has_section(section): self.add_section(section) - self.set(section, option, value_str) + super().set(section, option, value_str) return self @needs_values From d7ce6fc19199cf8698d722c7d8ae38ff81424fba Mon Sep 17 00:00:00 2001 From: "GPT 5.5" Date: Tue, 28 Apr 2026 21:47:27 +0000 Subject: [PATCH 3/6] Improve pure Python rev-parse coverage and behavior (#2135) Port object-resolving revspec cases inspired by gix-revision into deterministic GitPython tests, without shelling out to Git or Gix at runtime. Refactor rev_parse handling around anchors, navigation, peeling, reflog selectors, path/index lookups, describe-style names, and commit-message searches. Document observed Git/Gix behavior differences and the GitPython choices made for user-facing compatibility. Co-authored-by: Sebastian Thiel --- git/repo/fun.py | 513 ++++++++++++++++++++++++++++++----------- test/test_repo.py | 17 ++ test/test_rev_parse.py | 138 +++++++++++ 3 files changed, 536 insertions(+), 132 deletions(-) create mode 100644 test/test_rev_parse.py diff --git a/git/repo/fun.py b/git/repo/fun.py index 3f00e60ea..d91ce5c0b 100644 --- a/git/repo/fun.py +++ b/git/repo/fun.py @@ -20,6 +20,7 @@ import os import os.path as osp from pathlib import Path +import re import stat from string import digits @@ -28,12 +29,13 @@ from git.cmd import Git from git.exc import WorkTreeRepositoryUnsupported from git.objects import Object +from git.objects.util import parse_date from git.refs import SymbolicReference from git.util import cygpath, bin_to_hex, hex_to_bin # Typing ---------------------------------------------------------------------- -from typing import Optional, TYPE_CHECKING, Union, cast, overload +from typing import Optional, TYPE_CHECKING, Tuple, Union, cast, overload from git.types import AnyGitObject, Literal, PathLike @@ -41,6 +43,7 @@ from git.db import GitCmdObjectDB from git.objects import Commit, TagObject from git.refs.reference import Reference + from git.refs.log import RefLog, RefLogEntry from git.refs.tag import Tag from .base import Repo @@ -139,6 +142,23 @@ def short_to_long(odb: "GitCmdObjectDB", hexsha: str) -> Optional[bytes]: # END exception handling +def _describe_to_long(repo: "Repo", name: str) -> Optional[bytes]: + """Resolve git-describe style names to the abbreviated object they contain.""" + match = re.match(r"^.+-\d+-g([0-9A-Fa-f]{4,40})(?:-dirty)?$", name) + if match is None: + match = re.match(r"^.+-g([0-9A-Fa-f]{4,40})(?:-dirty)?$", name) + if match is None: + match = re.match(r"^([0-9A-Fa-f]{4,40})-dirty$", name) + if match is None: + return None + # END handle match + + hexsha = match.group(1) + if len(hexsha) == 40: + return hexsha.encode("ascii") + return short_to_long(repo.odb, hexsha) + + @overload def name_to_object(repo: "Repo", name: str, return_ref: Literal[False] = ...) -> AnyGitObject: ... @@ -170,6 +190,10 @@ def name_to_object(repo: "Repo", name: str, return_ref: bool = False) -> Union[A # END handle short shas # END find sha if it matches + if hexsha is None: + hexsha = _describe_to_long(repo, name) + # END handle describe output + # If we couldn't find an object for what seemed to be a short hexsha, try to find it # as reference anyway, it could be named 'aaa' for instance. if hexsha is None: @@ -227,6 +251,298 @@ def to_commit(obj: Object) -> "Commit": return obj +def _object_from_hexsha(repo: "Repo", hexsha: str) -> AnyGitObject: + return Object.new_from_sha(repo, hex_to_bin(hexsha)) + + +def _current_reflog_ref(repo: "Repo") -> SymbolicReference: + return repo.head + + +def _ref_log(repo: "Repo", ref: SymbolicReference) -> "RefLog": + try: + return ref.log() + except FileNotFoundError: + try: + if ref.path == repo.head.ref.path: + return repo.head.log() + # END handle linked-worktree current branch logs + except TypeError: + pass + # END handle detached head + raise + # END handle missing branch log + + +def _ref_log_entry(repo: "Repo", ref: SymbolicReference, index: int) -> "RefLogEntry": + try: + return ref.log_entry(index) + except FileNotFoundError: + try: + if ref.path == repo.head.ref.path: + return repo.head.log_entry(index) + # END handle linked-worktree current branch logs + except TypeError: + pass + # END handle detached head + raise + # END handle missing branch log + + +def _find_reflog_entry_by_date(repo: "Repo", ref: SymbolicReference, spec: str) -> str: + try: + timestamp, _offset = parse_date(spec) + except ValueError as e: + raise NotImplementedError("Support for additional @{...} modes not implemented") from e + # END handle unsupported dates + log = _ref_log(repo, ref) + if not log: + raise IndexError("Invalid revlog date: %s" % spec) + # END handle empty log + + for entry in reversed(log): + if entry.time[0] <= timestamp: + return entry.newhexsha + # END found candidate + # END for each entry + return log[0].newhexsha + + +def _previous_checked_out_branch(repo: "Repo", nth: int) -> AnyGitObject: + if nth <= 0: + raise ValueError("Invalid previous checkout selector: -%i" % nth) + # END handle invalid input + + seen = 0 + for entry in reversed(_ref_log(repo, repo.head)): + message = entry.message or "" + prefix = "checkout: moving from " + if not message.startswith(prefix): + continue + # END skip non-checkouts + + previous_branch = message[len(prefix) :].split(" to ", 1)[0] + seen += 1 + if seen == nth: + return name_to_object(repo, previous_branch) + # END found selector + # END for each entry + raise IndexError("Invalid previous checkout selector: -%i" % nth) + + +def _tracking_branch_object(repo: "Repo", ref: Optional[SymbolicReference]) -> AnyGitObject: + from git.refs.head import Head + + if ref is None: + try: + head = repo.active_branch + except TypeError as e: + raise BadName("@{upstream}") from e + elif isinstance(ref, Head): + head = ref + else: + raise BadName("%s@{upstream}" % ref.name) + # END handle head + + tracking_branch = head.tracking_branch() + if tracking_branch is None: + raise BadName("%s@{upstream}" % head.name) + # END handle missing upstream + return tracking_branch.commit + + +def _apply_reflog(repo: "Repo", ref: Optional[SymbolicReference], content: str) -> AnyGitObject: + if content.startswith("+"): + content = content[1:] + # END handle explicit positive sign + + if content.startswith("-"): + if ref is not None: + raise ValueError("Previous checkout selectors do not take an explicit ref") + if content == "-0": + raise ValueError("Negative zero is invalid in reflog selector") + # END handle invalid negative zero + try: + return _previous_checked_out_branch(repo, int(content[1:])) + except ValueError as e: + raise ValueError("Invalid previous checkout selector: %s" % content) from e + # END handle previous checkout branch + + content_lower = content.lower() + if content_lower in ("u", "upstream", "push"): + return _tracking_branch_object(repo, ref) + # END handle sibling branches + + ref = ref or _current_reflog_ref(repo) + try: + entry_no = int(content) + except ValueError: + hexsha = _find_reflog_entry_by_date(repo, ref, content) + else: + if entry_no >= 100000000: + hexsha = _find_reflog_entry_by_date(repo, ref, "%s +0000" % entry_no) + elif entry_no == 0: + return ref.commit + else: + try: + entry = _ref_log_entry(repo, ref, -(entry_no + 1)) + except IndexError as e: + raise IndexError("Invalid revlog index: %i" % entry_no) from e + # END handle index out of bound + hexsha = entry.newhexsha + # END handle offset or date-like timestamp + # END handle content + return _object_from_hexsha(repo, hexsha) + + +def _find_closing_brace(rev: str, start: int) -> int: + depth = 1 + escaped = False + for idx in range(start + 1, len(rev)): + char = rev[idx] + if escaped: + escaped = False + elif char == "\\": + escaped = True + elif char == "{": + depth += 1 + elif char == "}": + depth -= 1 + if depth == 0: + return idx + # END found end + # END handle char + # END for each char + raise ValueError("Missing closing brace to define type in %s" % rev) + + +def _parse_search(pattern: str) -> Tuple[str, bool]: + if not pattern: + raise ValueError("Revision search requires a pattern") + # END handle empty pattern + + if pattern.startswith("!-"): + return pattern[2:], True + if pattern.startswith("!!"): + return pattern[1:], False + if pattern.startswith("!"): + raise ValueError("Need one character after /!, typically -") + return pattern, False + + +def _unescape_braced_regex(pattern: str) -> str: + out = [] + idx = 0 + while idx < len(pattern): + char = pattern[idx] + if char == "\\" and idx + 1 < len(pattern): + next_char = pattern[idx + 1] + if next_char in "{}\\": + out.append(next_char) + else: + out.append(char) + out.append(next_char) + # END handle escaped char + idx += 2 + continue + # END handle backslash + out.append(char) + idx += 1 + # END for each char + return "".join(out) + + +def _find_commit_by_message( + repo: "Repo", rev: Optional[AnyGitObject], pattern: str, braced: bool = False +) -> AnyGitObject: + pattern, negated = _parse_search(_unescape_braced_regex(pattern) if braced else pattern) + regex = re.compile(pattern) + if rev is None: + commits = repo.iter_commits("--all") + else: + commits = repo.iter_commits(to_commit(cast(Object, rev)).hexsha) + # END handle starting point + + for commit in commits: + matches = regex.search(commit.message or "") is not None + if matches != negated: + return commit + # END found commit + # END for each commit + raise BadName("No commit found matching message pattern %r" % pattern) + + +def _index_lookup(repo: "Repo", spec: str) -> AnyGitObject: + if not spec: + raise ValueError("':' must be followed by a path") + # END handle empty lookup + + stage = 0 + path = spec + if len(spec) >= 2 and spec[1] == ":" and spec[0] in "0123": + stage = int(spec[0]) + path = spec[2:] + # END handle stage + + try: + return repo.index.entries[(path, stage)].to_blob(repo) + except KeyError as e: + raise BadName("Path %r did not exist in the index at stage %i" % (path, stage)) from e + + +def _tree_lookup(obj: AnyGitObject, path: str) -> AnyGitObject: + if obj.type != "tree": + obj = to_commit(cast(Object, obj)).tree + # END get tree + if not path: + return obj + return obj[path] + + +def _peel(obj: AnyGitObject, output_type: str, repo: "Repo", rev: str) -> AnyGitObject: + if output_type == "/": + return obj + if output_type.startswith("/"): + return _find_commit_by_message(repo, obj, output_type[1:], braced=True) + if output_type == "": + return deref_tag(cast("TagObject", obj)) if obj.type == "tag" else obj + if output_type == "object": + return obj + if output_type == "commit": + return to_commit(cast(Object, obj)) + if output_type == "tree": + return to_commit(cast(Object, obj)).tree if obj.type != "tree" else obj + if output_type == "blob": + obj = deref_tag(cast("TagObject", obj)) if obj.type == "tag" else obj + if obj.type == output_type: + return obj + # END handle matching type + raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) + if output_type == "tag": + if obj.type == output_type: + return obj + # END handle matching type + raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) + # END handle known types + raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) + + +def _first_rev_token(rev: str) -> Optional[int]: + for idx, char in enumerate(rev): + if char in "^~:": + return idx + if char == "@": + next_char = rev[idx + 1] if idx + 1 < len(rev) else None + if idx == 0 and next_char in (None, "^", "~", ":", "{"): + return idx + if next_char == "{": + return idx + # END handle reflog selector + # END handle at symbol + # END for each char + return None + + def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: """Parse a revision string. Like :manpage:`git-rev-parse(1)`. @@ -253,135 +569,81 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: :raise IndexError: If an invalid reflog index is specified. """ - # Are we in colon search mode? if rev.startswith(":/"): - # Colon search mode - raise NotImplementedError("commit by message search (regex)") - # END handle search + return _find_commit_by_message(repo, None, rev[2:]) + if rev.startswith(":"): + return _index_lookup(repo, rev[1:]) + # END handle top-level colon modes obj: Optional[AnyGitObject] = None ref = None - output_type = "commit" - start = 0 - parsed_to = 0 lr = len(rev) - while start < lr: - if rev[start] not in "^~:@": - start += 1 - continue - # END handle start + first_token = _first_rev_token(rev) + if first_token is None: + return name_to_object(repo, rev) + # END handle plain name + + if first_token == 0: + if rev[0] != "@": + raise ValueError("Revision specifier must start with an object name: %s" % rev) + # END handle invalid leading token + ref = _current_reflog_ref(repo) + obj = ref.commit + start = 0 if rev.startswith("@{") else 1 + else: + if rev[first_token] == "@": + ref = cast("Reference", name_to_object(repo, rev[:first_token], return_ref=True)) + obj = ref.commit + else: + obj = name_to_object(repo, rev[:first_token]) + # END handle anchor + start = first_token + # END initialize anchor + while start < lr: token = rev[start] - if obj is None: - # token is a rev name. - if start == 0: - ref = repo.head.ref - else: - if token == "@": - ref = cast("Reference", name_to_object(repo, rev[:start], return_ref=True)) - else: - obj = name_to_object(repo, rev[:start]) - # END handle token - # END handle refname - else: - if ref is not None: - obj = ref.commit - # END handle ref - # END initialize obj on first token - - start += 1 + if token == "@": + if start + 1 >= lr or rev[start + 1] != "{": + raise ValueError("Invalid @ token in revision specifier: %s" % rev) + # END handle invalid @ + end = _find_closing_brace(rev, start + 1) + obj = _apply_reflog(repo, ref if first_token != 0 and start == first_token else None, rev[start + 2 : end]) + ref = None + start = end + 1 + continue + # END handle reflog - # Try to parse {type}. - if start < lr and rev[start] == "{": - end = rev.find("}", start) - if end == -1: - raise ValueError("Missing closing brace to define type in %s" % rev) - output_type = rev[start + 1 : end] # Exclude brace. - - # Handle type. - if output_type == "commit": - obj = cast("TagObject", obj) - if obj and obj.type == "tag": - obj = deref_tag(obj) - else: - # Cannot do anything for non-tags. - pass - # END handle tag - elif output_type == "tree": - try: - obj = cast(AnyGitObject, obj) - obj = to_commit(obj).tree - except (AttributeError, ValueError): - pass # Error raised later. - # END exception handling - elif output_type in ("", "blob"): - obj = cast("TagObject", obj) - if obj and obj.type == "tag": - obj = deref_tag(obj) - else: - # Cannot do anything for non-tags. - pass - # END handle tag - elif token == "@": - # try single int - assert ref is not None, "Require Reference to access reflog" - revlog_index = None - try: - # Transform reversed index into the format of our revlog. - revlog_index = -(int(output_type) + 1) - except ValueError as e: - # TODO: Try to parse the other date options, using parse_date maybe. - raise NotImplementedError("Support for additional @{...} modes not implemented") from e - # END handle revlog index - - try: - entry = ref.log_entry(revlog_index) - except IndexError as e: - raise IndexError("Invalid revlog index: %i" % revlog_index) from e - # END handle index out of bound - - obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) - - # Make it pass the following checks. - output_type = "" - else: - raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) - # END handle output type + if token == ":": + return _tree_lookup(cast(AnyGitObject, obj), rev[start + 1 :]) + # END handle path - # Empty output types don't require any specific type, its just about - # dereferencing tags. - if output_type and obj and obj.type != output_type: - raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) - # END verify output type + start += 1 - start = end + 1 # Skip brace. - parsed_to = start + if token == "^" and start < lr and rev[start] == "{": + end = _find_closing_brace(rev, start) + obj = _peel(cast(AnyGitObject, obj), rev[start + 1 : end], repo, rev) + ref = None + start = end + 1 continue # END parse type - # Try to parse a number. num = 0 - if token != ":": - found_digit = False - while start < lr: - if rev[start] in digits: - num = num * 10 + int(rev[start]) - start += 1 - found_digit = True - else: - break - # END handle number - # END number parse loop - - # No explicit number given, 1 is the default. It could be 0 though. - if not found_digit: - num = 1 - # END set default num - # END number parsing only if non-blob mode - - parsed_to = start - # Handle hierarchy walk. + found_digit = False + while start < lr: + if rev[start] in digits: + num = num * 10 + int(rev[start]) + start += 1 + found_digit = True + else: + break + # END handle number + # END number parse loop + + if not found_digit: + num = 1 + # END set default num + try: obj = cast(AnyGitObject, obj) if token == "~": @@ -391,15 +653,11 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: # END for each history item to walk elif token == "^": obj = to_commit(obj) - # Must be n'th parent. - if num: + if num == 0: + pass + else: obj = obj.parents[num - 1] - elif token == ":": - if obj.type != "tree": - obj = obj.tree - # END get tree type - obj = obj[rev[start:]] - parsed_to = lr + # END handle parent else: raise ValueError("Invalid token: %r" % token) # END end handle tag @@ -410,16 +668,7 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: # END exception handling # END parse loop - # Still no obj? It's probably a simple name. - if obj is None: - obj = name_to_object(repo, rev) - parsed_to = lr - # END handle simple name - if obj is None: raise ValueError("Revision specifier could not be parsed: %s" % rev) - if parsed_to != lr: - raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) - return obj diff --git a/test/test_repo.py b/test/test_repo.py index 544b5c561..0dd3d5945 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -146,6 +146,23 @@ def test_commit_from_revision(self): self.assertEqual(commit.type, "commit") self.assertEqual(self.rorepo.commit(commit), commit) + @with_rw_directory + def test_commit_from_tag_starting_with_at(self, rw_dir): + repo = Repo.init(rw_dir) + with repo.config_writer() as writer: + writer.set_value("user", "name", "GitPython Tests") + writer.set_value("user", "email", "gitpython@example.com") + + tracked_file = Path(rw_dir) / "hello.txt" + tracked_file.write_text("hello") + repo.index.add([str(tracked_file)]) + commit = repo.index.commit("init") + repo.create_tag("@foo") + + self.assertEqual(repo.tags["@foo"].commit, commit) + self.assertEqual(repo.commit("@"), commit) + self.assertEqual(repo.commit("@foo"), commit) + def test_commits(self): mc = 10 commits = list(self.rorepo.iter_commits("0.1.6", max_count=mc)) diff --git a/test/test_rev_parse.py b/test/test_rev_parse.py new file mode 100644 index 000000000..371210fa9 --- /dev/null +++ b/test/test_rev_parse.py @@ -0,0 +1,138 @@ +from pathlib import Path + +import pytest + +from git import Repo +from gitdb.exc import BadName + + +def _write(repo, path, content): + full_path = Path(repo.working_tree_dir) / path + full_path.parent.mkdir(parents=True, exist_ok=True) + full_path.write_text(content) + repo.index.add([str(full_path)]) + + +@pytest.fixture +def rev_parse_repo(tmp_path): + repo = Repo.init(tmp_path) + with repo.config_writer() as writer: + writer.set_value("user", "name", "GitPython Tests") + writer.set_value("user", "email", "gitpython@example.com") + + _write(repo, "README.md", "root\n") + _write(repo, "CHANGES", "root changes\n") + _write(repo, "dir/file.txt", "root file\n") + root = repo.index.commit("root commit") + repo.create_tag("ann", ref=root, message="annotated tag") + + _write(repo, "README.md", "release\n") + release = repo.index.commit("release candidate") + repo.create_tag("v1.0", ref=release) + main = repo.active_branch + + side = repo.create_head("side", root) + side.checkout() + _write(repo, "side.txt", "side\n") + side_commit = repo.index.commit("side branch") + + main.checkout() + repo.git.merge("--no-ff", "side", "-m", "merge side") + merge = repo.head.commit + + repo.create_head("aaaaaaaa", merge) + repo.create_tag("@foo", ref=merge) + + return { + "repo": repo, + "root": root, + "release": release, + "side": side_commit, + "merge": merge, + "main": main, + } + + +def test_rev_parse_names_hex_and_describe_forms(rev_parse_repo): + repo = rev_parse_repo["repo"] + merge = rev_parse_repo["merge"] + + assert repo.rev_parse("@") == merge + assert repo.rev_parse("@foo") == merge + assert repo.rev_parse("aaaaaaaa") == merge + assert repo.rev_parse(merge.hexsha[:7]) == merge + assert repo.rev_parse("v1.0-1-g%s" % merge.hexsha[:7]) == merge + assert repo.rev_parse("anything-9-g%s" % merge.hexsha[:7]) == merge + assert repo.rev_parse("%s-dirty" % merge.hexsha[:7]) == merge + + +def test_rev_parse_navigation_and_peeling(rev_parse_repo): + repo = rev_parse_repo["repo"] + root = rev_parse_repo["root"] + release = rev_parse_repo["release"] + side = rev_parse_repo["side"] + merge = rev_parse_repo["merge"] + tag = repo.rev_parse("ann") + + assert repo.rev_parse("HEAD^0") == merge + assert repo.rev_parse("HEAD~0") == merge + assert repo.rev_parse("HEAD^1") == release + assert repo.rev_parse("HEAD^2") == side + assert repo.rev_parse("HEAD~") == release + assert repo.rev_parse("HEAD^^") == root + + assert tag.type == "tag" + assert repo.rev_parse("ann^{object}") == tag + assert repo.rev_parse("ann^{tag}") == tag + assert repo.rev_parse("ann^{}") == root + assert repo.rev_parse("ann^{commit}") == root + assert repo.rev_parse("HEAD^{tree}") == merge.tree + assert repo.rev_parse("HEAD^{/}") == merge + + +def test_rev_parse_tree_and_index_paths(rev_parse_repo): + repo = rev_parse_repo["repo"] + merge = rev_parse_repo["merge"] + + assert repo.rev_parse("HEAD:") == merge.tree + assert repo.rev_parse("HEAD:README.md") == merge.tree["README.md"] + assert repo.rev_parse("HEAD^{tree}:README.md") == merge.tree["README.md"] + assert repo.rev_parse(":README.md").binsha == merge.tree["README.md"].binsha + assert repo.rev_parse(":0:README.md").binsha == merge.tree["README.md"].binsha + + +def test_rev_parse_reflog_selectors(rev_parse_repo): + repo = rev_parse_repo["repo"] + merge = rev_parse_repo["merge"] + side = rev_parse_repo["side"] + main = rev_parse_repo["main"] + + assert repo.rev_parse("@{0}") == merge + assert repo.rev_parse("@{+0}") == merge + assert repo.rev_parse("%s@{0}" % main.name) == merge + assert repo.rev_parse("@{-1}") == side + + +def test_rev_parse_commit_message_search(rev_parse_repo): + repo = rev_parse_repo["repo"] + release = rev_parse_repo["release"] + merge = rev_parse_repo["merge"] + + assert repo.rev_parse(":/release") == release + assert repo.rev_parse("HEAD^{/release}") == release + assert repo.rev_parse("HEAD^{/!-release}") == merge + + +def test_rev_parse_rejects_invalid_object_specs(rev_parse_repo): + repo = rev_parse_repo["repo"] + + with pytest.raises(ValueError): + repo.rev_parse(":") + with pytest.raises(ValueError): + repo.rev_parse(":/") + with pytest.raises(ValueError): + repo.rev_parse("@{-0}") + with pytest.raises(ValueError): + repo.rev_parse("HEAD^{invalid}") + with pytest.raises(BadName): + repo.rev_parse(":missing") From bdbdf4bba08f59042a2e1197313ca9a2060021d0 Mon Sep 17 00:00:00 2001 From: Codex GPT-5 Date: Wed, 29 Apr 2026 06:55:03 +0800 Subject: [PATCH 4/6] Fix rev-parse CI issues --- git/repo/fun.py | 42 ++++++++++++++++++++++++++++++++++-------- test/test_repo.py | 9 +++++++-- test/test_rev_parse.py | 2 ++ 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/git/repo/fun.py b/git/repo/fun.py index d91ce5c0b..ed00dd833 100644 --- a/git/repo/fun.py +++ b/git/repo/fun.py @@ -41,7 +41,7 @@ if TYPE_CHECKING: from git.db import GitCmdObjectDB - from git.objects import Commit, TagObject + from git.objects import Commit from git.refs.reference import Reference from git.refs.log import RefLog, RefLogEntry from git.refs.tag import Tag @@ -256,13 +256,30 @@ def _object_from_hexsha(repo: "Repo", hexsha: str) -> AnyGitObject: def _current_reflog_ref(repo: "Repo") -> SymbolicReference: - return repo.head + try: + return repo.head.ref + except TypeError: + return repo.head + # END handle detached head + + +def _common_reflog_path(repo: "Repo", ref: SymbolicReference) -> Optional[str]: + if repo.common_dir == repo.git_dir: + return None + # END handle normal repository + return SymbolicReference._get_validated_path(osp.join(repo.common_dir, "logs"), ref.path) def _ref_log(repo: "Repo", ref: SymbolicReference) -> "RefLog": try: return ref.log() except FileNotFoundError: + common_path = _common_reflog_path(repo, ref) + if common_path and osp.isfile(common_path): + from git.refs.log import RefLog + + return RefLog.from_file(common_path) + # END handle linked-worktree branch logs try: if ref.path == repo.head.ref.path: return repo.head.log() @@ -278,6 +295,12 @@ def _ref_log_entry(repo: "Repo", ref: SymbolicReference, index: int) -> "RefLogE try: return ref.log_entry(index) except FileNotFoundError: + common_path = _common_reflog_path(repo, ref) + if common_path and osp.isfile(common_path): + from git.refs.log import RefLog + + return RefLog.entry_at(common_path, index) + # END handle linked-worktree branch logs try: if ref.path == repo.head.ref.path: return repo.head.log_entry(index) @@ -464,7 +487,11 @@ def _find_commit_by_message( # END handle starting point for commit in commits: - matches = regex.search(commit.message or "") is not None + message = commit.message + if isinstance(message, bytes): + message = message.decode(commit.encoding, "replace") + # END handle bytes message + matches = regex.search(message or "") is not None if matches != negated: return commit # END found commit @@ -505,7 +532,7 @@ def _peel(obj: AnyGitObject, output_type: str, repo: "Repo", rev: str) -> AnyGit if output_type.startswith("/"): return _find_commit_by_message(repo, obj, output_type[1:], braced=True) if output_type == "": - return deref_tag(cast("TagObject", obj)) if obj.type == "tag" else obj + return deref_tag(obj) if obj.type == "tag" else obj if output_type == "object": return obj if output_type == "commit": @@ -513,7 +540,7 @@ def _peel(obj: AnyGitObject, output_type: str, repo: "Repo", rev: str) -> AnyGit if output_type == "tree": return to_commit(cast(Object, obj)).tree if obj.type != "tree" else obj if output_type == "blob": - obj = deref_tag(cast("TagObject", obj)) if obj.type == "tag" else obj + obj = deref_tag(obj) if obj.type == "tag" else obj if obj.type == output_type: return obj # END handle matching type @@ -615,14 +642,14 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: # END handle reflog if token == ":": - return _tree_lookup(cast(AnyGitObject, obj), rev[start + 1 :]) + return _tree_lookup(obj, rev[start + 1 :]) # END handle path start += 1 if token == "^" and start < lr and rev[start] == "{": end = _find_closing_brace(rev, start) - obj = _peel(cast(AnyGitObject, obj), rev[start + 1 : end], repo, rev) + obj = _peel(obj, rev[start + 1 : end], repo, rev) ref = None start = end + 1 continue @@ -645,7 +672,6 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: # END set default num try: - obj = cast(AnyGitObject, obj) if token == "~": obj = to_commit(obj) for _ in range(num): diff --git a/test/test_repo.py b/test/test_repo.py index 0dd3d5945..7262395bd 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -865,8 +865,13 @@ def test_rev_parse(self): # Currently, nothing more is supported. self.assertRaises(NotImplementedError, rev_parse, "@{1 week ago}") - # The last position. - assert rev_parse("@{1}") != head.commit + # The previous position, if this checkout has enough reflog history. + try: + previous = rev_parse("@{1}") + except IndexError: + pass + else: + self.assertNotEqual(previous, head.commit) def test_repo_odbtype(self): target_type = GitCmdObjectDB diff --git a/test/test_rev_parse.py b/test/test_rev_parse.py index 371210fa9..d96fdc1a2 100644 --- a/test/test_rev_parse.py +++ b/test/test_rev_parse.py @@ -106,9 +106,11 @@ def test_rev_parse_reflog_selectors(rev_parse_repo): merge = rev_parse_repo["merge"] side = rev_parse_repo["side"] main = rev_parse_repo["main"] + release = rev_parse_repo["release"] assert repo.rev_parse("@{0}") == merge assert repo.rev_parse("@{+0}") == merge + assert repo.rev_parse("@{1}") == release assert repo.rev_parse("%s@{0}" % main.name) == merge assert repo.rev_parse("@{-1}") == side From 6cf7ac33d449db095e8c301abba664836c16bfc8 Mon Sep 17 00:00:00 2001 From: Codex GPT-5 Date: Wed, 29 Apr 2026 07:11:05 +0800 Subject: [PATCH 5/6] Address rev-parse review feedback --- git/repo/fun.py | 56 ++++++++++++++++++++++++++++++++++-------- test/test_rev_parse.py | 35 ++++++++++++++++++++------ 2 files changed, 73 insertions(+), 18 deletions(-) diff --git a/git/repo/fun.py b/git/repo/fun.py index ed00dd833..66e7eba69 100644 --- a/git/repo/fun.py +++ b/git/repo/fun.py @@ -35,7 +35,7 @@ # Typing ---------------------------------------------------------------------- -from typing import Optional, TYPE_CHECKING, Tuple, Union, cast, overload +from typing import Iterator, Optional, TYPE_CHECKING, Tuple, Union, cast, overload from git.types import AnyGitObject, Literal, PathLike @@ -190,10 +190,6 @@ def name_to_object(repo: "Repo", name: str, return_ref: bool = False) -> Union[A # END handle short shas # END find sha if it matches - if hexsha is None: - hexsha = _describe_to_long(repo, name) - # END handle describe output - # If we couldn't find an object for what seemed to be a short hexsha, try to find it # as reference anyway, it could be named 'aaa' for instance. if hexsha is None: @@ -216,6 +212,10 @@ def name_to_object(repo: "Repo", name: str, return_ref: bool = False) -> Union[A # END for each base # END handle hexsha + if hexsha is None: + hexsha = _describe_to_long(repo, name) + # END handle describe output + # Didn't find any ref, this is an error. if return_ref: raise BadObject("Couldn't find reference named %r" % name) @@ -363,6 +363,8 @@ def _tracking_branch_object(repo: "Repo", ref: Optional[SymbolicReference]) -> A raise BadName("@{upstream}") from e elif isinstance(ref, Head): head = ref + elif os.fspath(ref.path).startswith("refs/heads/"): + head = Head(repo, ref.path) else: raise BadName("%s@{upstream}" % ref.name) # END handle head @@ -479,11 +481,15 @@ def _find_commit_by_message( repo: "Repo", rev: Optional[AnyGitObject], pattern: str, braced: bool = False ) -> AnyGitObject: pattern, negated = _parse_search(_unescape_braced_regex(pattern) if braced else pattern) - regex = re.compile(pattern) + try: + regex = re.compile(pattern) + except re.error as e: + raise ValueError("Invalid commit message regex %r" % pattern) from e + # END handle invalid regex if rev is None: - commits = repo.iter_commits("--all") + commits = _all_ref_commits(repo) else: - commits = repo.iter_commits(to_commit(cast(Object, rev)).hexsha) + commits = _reachable_commits([to_commit(cast(Object, rev))]) # END handle starting point for commit in commits: @@ -499,6 +505,38 @@ def _find_commit_by_message( raise BadName("No commit found matching message pattern %r" % pattern) +def _all_ref_commits(repo: "Repo") -> Iterator["Commit"]: + starts = [] + for ref in repo.references: + try: + starts.append(to_commit(cast(Object, ref.object))) + except (BadName, ValueError): + pass + # END skip refs that do not point to commits + # END for each ref + try: + starts.append(repo.head.commit) + except ValueError: + pass + # END handle unborn head + return _reachable_commits(starts) + + +def _reachable_commits(starts: list["Commit"]) -> Iterator["Commit"]: + seen = set() + pending = starts[:] + while pending: + pending.sort(key=lambda commit: commit.committed_date, reverse=True) + commit = pending.pop(0) + if commit.binsha in seen: + continue + # END skip seen commit + seen.add(commit.binsha) + yield commit + pending.extend(commit.parents) + # END while commits remain + + def _index_lookup(repo: "Repo", spec: str) -> AnyGitObject: if not spec: raise ValueError("':' must be followed by a path") @@ -527,8 +565,6 @@ def _tree_lookup(obj: AnyGitObject, path: str) -> AnyGitObject: def _peel(obj: AnyGitObject, output_type: str, repo: "Repo", rev: str) -> AnyGitObject: - if output_type == "/": - return obj if output_type.startswith("/"): return _find_commit_by_message(repo, obj, output_type[1:], braced=True) if output_type == "": diff --git a/test/test_rev_parse.py b/test/test_rev_parse.py index d96fdc1a2..b00347668 100644 --- a/test/test_rev_parse.py +++ b/test/test_rev_parse.py @@ -1,8 +1,15 @@ +# Copyright (C) 2026 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + from pathlib import Path import pytest from git import Repo +from git.refs import RemoteReference +from git.refs import SymbolicReference from gitdb.exc import BadName @@ -31,14 +38,12 @@ def rev_parse_repo(tmp_path): repo.create_tag("v1.0", ref=release) main = repo.active_branch - side = repo.create_head("side", root) - side.checkout() _write(repo, "side.txt", "side\n") - side_commit = repo.index.commit("side branch") + side_commit = repo.index.commit("side branch", parent_commits=[root], head=False, skip_hooks=True) + repo.create_head("side", side_commit) - main.checkout() - repo.git.merge("--no-ff", "side", "-m", "merge side") - merge = repo.head.commit + merge = repo.index.commit("merge side", parent_commits=[release, side_commit], skip_hooks=True) + repo.head.log_append(side_commit.binsha, "checkout: moving from side to main", merge.binsha) repo.create_head("aaaaaaaa", merge) repo.create_tag("@foo", ref=merge) @@ -55,16 +60,21 @@ def rev_parse_repo(tmp_path): def test_rev_parse_names_hex_and_describe_forms(rev_parse_repo): repo = rev_parse_repo["repo"] + release = rev_parse_repo["release"] merge = rev_parse_repo["merge"] assert repo.rev_parse("@") == merge assert repo.rev_parse("@foo") == merge assert repo.rev_parse("aaaaaaaa") == merge assert repo.rev_parse(merge.hexsha[:7]) == merge + describe_name = "anything-9-g%s" % merge.hexsha[:7] assert repo.rev_parse("v1.0-1-g%s" % merge.hexsha[:7]) == merge - assert repo.rev_parse("anything-9-g%s" % merge.hexsha[:7]) == merge + assert repo.rev_parse(describe_name) == merge assert repo.rev_parse("%s-dirty" % merge.hexsha[:7]) == merge + repo.create_tag(describe_name, ref=release) + assert repo.rev_parse(describe_name) == release + def test_rev_parse_navigation_and_peeling(rev_parse_repo): repo = rev_parse_repo["repo"] @@ -87,7 +97,8 @@ def test_rev_parse_navigation_and_peeling(rev_parse_repo): assert repo.rev_parse("ann^{}") == root assert repo.rev_parse("ann^{commit}") == root assert repo.rev_parse("HEAD^{tree}") == merge.tree - assert repo.rev_parse("HEAD^{/}") == merge + with pytest.raises(ValueError): + repo.rev_parse("HEAD^{/}") def test_rev_parse_tree_and_index_paths(rev_parse_repo): @@ -114,6 +125,10 @@ def test_rev_parse_reflog_selectors(rev_parse_repo): assert repo.rev_parse("%s@{0}" % main.name) == merge assert repo.rev_parse("@{-1}") == side + SymbolicReference.create(repo, "refs/remotes/origin/%s" % main.name, merge) + main.set_tracking_branch(RemoteReference(repo, "refs/remotes/origin/%s" % main.name)) + assert repo.rev_parse("%s@{upstream}" % main.name) == merge + def test_rev_parse_commit_message_search(rev_parse_repo): repo = rev_parse_repo["repo"] @@ -132,6 +147,10 @@ def test_rev_parse_rejects_invalid_object_specs(rev_parse_repo): repo.rev_parse(":") with pytest.raises(ValueError): repo.rev_parse(":/") + with pytest.raises(ValueError): + repo.rev_parse(":/[") + with pytest.raises(ValueError): + repo.rev_parse("HEAD^{/[}") with pytest.raises(ValueError): repo.rev_parse("@{-0}") with pytest.raises(ValueError): From aee2fd5c13770954469e650f1df8f92f0183bc70 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 29 Apr 2026 08:30:21 +0800 Subject: [PATCH 6/6] bump version to 3.1.49 --- VERSION | 2 +- doc/source/changes.rst | 11 +++++++++++ git/ext/gitdb | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index 94c78f538..8335f2d61 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.48 +3.1.49 diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 4ac67d077..020673826 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,6 +2,17 @@ Changelog ========= +3.1.49 +====== + +Save setting of configuration values, +which cuold be used to inject other more configuration. + +Also more conforming `rev-parse` implementation. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.49 + 3.1.48 ====== diff --git a/git/ext/gitdb b/git/ext/gitdb index 5c1b3036a..335c0f661 160000 --- a/git/ext/gitdb +++ b/git/ext/gitdb @@ -1 +1 @@ -Subproject commit 5c1b3036a6e34782e0ab6ce85e5ae64fe777fdbe +Subproject commit 335c0f66173eecdc7b2597c2b6c3d1fde795df30