Skip to content

Commit ca2e315

Browse files
committed
Added a GitIndex parser
Two things in @todo right now: - CLI flags like -cdkmosu (will be added after git status) - docstrings (will be populated in next commit)
1 parent 8639ff5 commit ca2e315

6 files changed

Lines changed: 210 additions & 44 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2222
- Added `ngit show-ref` that list references stored in `.git/refs`
2323
- Added `ngit tag` that can create, delete and list tags, and is based on `show-ref`'s backend
2424
- Added a stupid `ngit rev-parse` de-reference tags, branches, short-hashes etc
25+
- Added a `ngit ls-files` to parse and show `.git/index`
2526

2627

2728
### Changed

microprojects/ngit/libngit.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from microprojects.ngit.repository import resolve_ref, ref_list, tag_list
1818
from microprojects.ngit.object_utils import object_find_f, object_read, tag_create
1919
from microprojects.ngit.ngit_utils import cat_file, ls_tree, object_hash, repo_create
20-
from microprojects.ngit.ngit_utils import checkout, show_ref
20+
from microprojects.ngit.ngit_utils import checkout, show_ref, ls_files
2121
from microprojects.ngit.log import print_logs
2222

2323

@@ -306,6 +306,54 @@ def ngit_main() -> None:
306306
)
307307

308308
# ArgParser for ngit ls-files
309+
argsp_ls_files = arg_subparser.add_parser( # ls-files
310+
"ls-files",
311+
prog="ngit ls-files",
312+
description="Show information about files in the index and the working tree",
313+
help="Show information about files in the index and the working tree",
314+
formatter_class=argparse.RawTextHelpFormatter,
315+
)
316+
argsp_ls_files.add_argument( # -z
317+
"-z",
318+
dest="null_terminator",
319+
action="store_true",
320+
help="\\0 line termination on output and do not quote filenames",
321+
)
322+
argsp_ls_files.add_argument( # --format
323+
"--format",
324+
default="", # TODO: Use a sensible default for format
325+
dest="format",
326+
help="A string that interpolates %%(fieldname) from the result being shown",
327+
)
328+
argsp_ls_files.add_argument( # --verbose
329+
"--verbose",
330+
action="store_const",
331+
const="""\
332+
%(path) @%(objectname) <%(stage)>
333+
ctime: %(ctime:iso)
334+
mtime: %(mtime:iso)
335+
mode: %(objectmode)\ttype: %(objecttype)
336+
dev: %(dev)\tino: %(ino)
337+
uid: %(uid)\tgid: %(gid)
338+
size: %(objectsize)\tflags: %(flags)
339+
""",
340+
dest="format",
341+
help="show all information about GitIndex",
342+
)
343+
argsp_ls_files.add_argument( # --debug
344+
"--debug",
345+
action="store_const",
346+
const="""\
347+
%(path)
348+
ctime: %(ctime)
349+
mtime: %(mtime)
350+
dev: %(dev)\tino: %(ino)
351+
uid: %(uid)\tgid: %(gid)
352+
size: %(objectsize)\tflags: %(flags)
353+
""",
354+
dest="format",
355+
help="After each line that describes a file, add more data about its cache entry",
356+
)
309357

310358
# ArgParser for ngit ls-tree
311359
argsp_ls_tree = arg_subparser.add_parser( # ls-tree
@@ -606,14 +654,12 @@ def cmd_add(args: argparse.Namespace) -> None:
606654
def cmd_cat_file(args: argparse.Namespace) -> None:
607655
repo: GitRepository = repo_find_f()
608656

609-
# fmt: off
610657
flag: int = (
611658
1 if args.only_error else
612659
2 if args.only_type else
613660
3 if args.only_size else
614661
4 # default flag is 4
615-
)
616-
# fmt: on
662+
) # fmt: skip
617663

618664
cat_file(repo, args.object, fmt=args.type, flag=flag)
619665

@@ -704,7 +750,10 @@ def cmd_log(args: argparse.Namespace) -> None:
704750

705751

706752
def cmd_ls_files(args: argparse.Namespace) -> None:
707-
pass
753+
# TODO: Some args depends on `git status`, add them after status implemented
754+
repo: GitRepository = repo_find_f()
755+
endl: str = "\0" if args.null_terminator else "\n"
756+
ls_files(repo, args.format, endl)
708757

709758

710759
def cmd_ls_tree(args: argparse.Namespace) -> None:

microprojects/ngit/log.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,10 @@
77

88

99
def print_logs(
10-
repo: GitRepository,
11-
sha1: str,
12-
*,
13-
decorate: str,
14-
log_size: bool,
15-
max_count: int,
16-
skip: int,
17-
after: str,
18-
before: str,
19-
min_parents: int,
20-
max_parents: int,
21-
format_str: str,
22-
date_fmt: str,
23-
) -> None:
10+
repo: GitRepository, sha1: str, *, decorate: str, log_size: bool,
11+
max_count: int, skip: int, after: str, before: str, min_parents: int,
12+
max_parents: int, format_str: str, date_fmt: str
13+
) -> None: # fmt: skip
2414
"""Show commit logs, according to specified flags
2515
2616
Parameters:

microprojects/ngit/ngit_utils.py

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import os
22
import configparser
3+
from datetime import datetime
34

45
from microprojects.ngit.repository import GitRepository, repo_dir, repo_file, ref_list
6+
from microprojects.ngit.repository import GitIndex, GitIndexEntry
57
from microprojects.ngit.object import GitObject, GitCommit, GitBlob, GitTag, GitTree
68
from microprojects.ngit.object_utils import object_read, object_find, object_write
7-
from microprojects.ngit.object_utils import object_pick, shortify_hash
9+
from microprojects.ngit.object_utils import object_pick, shortify_hash, index_read
810

911

1012
def repo_default_config() -> configparser.ConfigParser:
@@ -122,15 +124,9 @@ def object_hash(repo: GitRepository | None, file, fmt: bytes) -> str:
122124

123125

124126
def ls_tree(
125-
repo: GitRepository,
126-
sha1: str,
127-
only_trees: bool,
128-
recurse_trees: bool,
129-
always_trees: bool,
130-
null_terminator: bool,
131-
format_str: str,
132-
_prefix: str = "",
133-
) -> None:
127+
repo: GitRepository, sha1: str, only_trees: bool, recurse_trees: bool,
128+
always_trees: bool, null_terminator: bool, format_str: str, _prefix: str = "",
129+
) -> None: # fmt: skip
134130
"""List the contents of a tree object
135131
136132
Parameters:
@@ -162,7 +158,7 @@ def prettify(leaf, format_str: str, obj_fmt: str, _prefix: str) -> str:
162158
if type(obj) is not GitTree:
163159
raise TypeError(f"fatal: {sha1} do not point to valid GitTree")
164160

165-
if not any([recurse_trees, always_trees, only_trees]):
161+
if not any([only_trees, recurse_trees, always_trees]):
166162
always_trees = True # set always_trees, if no
167163

168164
for leaf in obj.data:
@@ -183,20 +179,45 @@ def prettify(leaf, format_str: str, obj_fmt: str, _prefix: str) -> str:
183179
if always_trees or only_trees:
184180
print(prettify(leaf, format_str, obj_fmt, _prefix), end=endl)
185181
if recurse_trees:
186-
ls_tree(
187-
repo,
188-
leaf.sha1,
189-
only_trees,
190-
recurse_trees,
191-
always_trees,
192-
null_terminator,
193-
format_str,
194-
_prefix=os.path.join(_prefix, leaf.path),
195-
)
182+
ls_tree(repo, leaf.sha1, only_trees, recurse_trees, always_trees,
183+
null_terminator, format_str, _prefix=os.path.join(_prefix, leaf.path)) # fmt: skip
184+
196185
elif not only_trees:
197186
print(prettify(leaf, format_str, obj_fmt, _prefix), end=endl)
198187

199188

189+
def ls_files(repo: GitRepository, fmt: str, endl: str = "\n") -> None:
190+
""""""
191+
192+
def prettify(entry: GitIndexEntry, format_str: str) -> str:
193+
obj_type: str = {0o10: "blob", 0o12: "symlink", 0o16: "commit"}[entry.mode_type]
194+
195+
format_str = format_str.replace("%(objectmode)", f"{entry.mode_type:02o}{entry.mode_perms:04o}") # fmt: skip
196+
format_str = format_str.replace("%(objecttype)", f"{obj_type}")
197+
format_str = format_str.replace("%(objectname)", f"{entry.sha1}")
198+
format_str = format_str.replace("%(objectsize)", f"{entry.file_size}")
199+
format_str = format_str.replace("%(stage)", f"{entry.flag_stage}")
200+
format_str = format_str.replace("%(path)", f"{entry.name}")
201+
202+
# Extras
203+
format_str = format_str.replace("%(ctime)", f"{entry.ctime_s}:{entry.ctime_n}")
204+
format_str = format_str.replace("%(mtime)", f"{entry.mtime_s}:{entry.mtime_n}")
205+
format_str = format_str.replace("%(ctime:iso)", f"{datetime.fromtimestamp(entry.ctime_s)}") # fmt: skip
206+
format_str = format_str.replace("%(mtime:iso)", f"{datetime.fromtimestamp(entry.mtime_s)}") # fmt: skip
207+
format_str = format_str.replace("%(dev)", f"{entry.dev}")
208+
format_str = format_str.replace("%(ino)", f"{entry.ino}")
209+
format_str = format_str.replace("%(uid)", f"{entry.uid}")
210+
format_str = format_str.replace("%(gid)", f"{entry.gid}")
211+
format_str = format_str.replace("%(gid)", f"{entry.gid}")
212+
format_str = format_str.replace("%(flags)", f"{entry.flag_assume_valid}{entry.flag_stage}") # fmt: skip
213+
214+
return format_str
215+
216+
index: GitIndex = index_read(repo)
217+
for entry in index.entries:
218+
print(prettify(entry, fmt))
219+
220+
200221
def checkout(repo: GitRepository, tree: GitTree, path: str, quiet: bool) -> None:
201222
"""Switch branches or restore working tree files
202223
@@ -222,9 +243,7 @@ def checkout(repo: GitRepository, tree: GitTree, path: str, quiet: bool) -> None
222243
file.write(obj.data)
223244

224245

225-
def show_ref(
226-
repo: GitRepository, refs: list, only_sha1: bool, deref: bool, prefx="refs"
227-
) -> None:
246+
def show_ref(repo: GitRepository, refs: list, only_sha1: bool, deref: bool, prefx="refs") -> None: # fmt: skip
228247
"""List references in repo under ref, verify them, and print relevant information
229248
230249
Parameters:

microprojects/ngit/object_utils.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
import os # os and os.path provide some nice filesystem abstraction routines
33
import zlib # to compress & decompress files
44
import re
5+
import math
56

67

78
from microprojects.ngit.repository import GitRepository, repo_file, resolve_ref
8-
from microprojects.ngit.repository import repo_dir
9+
from microprojects.ngit.repository import repo_dir, GitIndex, GitIndexEntry
910
from microprojects.ngit.object import GitObject, GitBlob, GitCommit, GitTag, GitTree
1011

1112

@@ -195,6 +196,7 @@ def shortify_hash(repo: GitRepository, sha1: str) -> str:
195196
196197
"""
197198
# TODO: implement shortify hash properly
199+
# TODO: add `--abbrev=[n]` in ngit sub-commands that supports it
198200
return sha1[:7]
199201

200202

@@ -232,3 +234,68 @@ def tag_create(
232234
tag_file.write(sha1 + "\n")
233235

234236
return sha1
237+
238+
239+
def index_read(repo) -> GitIndex:
240+
""""""
241+
242+
def bin_read(raw_data: bytes) -> int:
243+
"""A helper function to converts big-endian bytes to int"""
244+
return int.from_bytes(raw_data, byteorder="big")
245+
246+
index_file: str = repo_file(repo, "index")
247+
248+
# New repositories do not have .git/index file
249+
if not os.path.exists(index_file):
250+
return GitIndex()
251+
252+
with open(index_file, "rb") as file:
253+
raw_idx: bytes = file.read()
254+
255+
signature: bytes = raw_idx[:4]
256+
assert signature == b"DIRC", f"signature should be b'DIRC', got {signature=}"
257+
version: int = bin_read(raw_idx[4:8])
258+
assert version == 2, f"Only version 2 GitIndex is supported, got {version=}"
259+
260+
len_entries: int = bin_read(raw_idx[8:12])
261+
entries: list[GitIndexEntry] = []
262+
263+
idx: int = 0
264+
raw_idx = raw_idx[12:] # 12 bytes are already read
265+
266+
for _ in range(len_entries):
267+
flags: int = bin_read(raw_idx[idx + 60 : idx + 62])
268+
269+
kwargs: dict[str, int] = { # some kwargs to pass to GitIndexEntry()
270+
"ctime_s": bin_read(raw_idx[idx + 0 : idx + 4]),
271+
"ctime_n": bin_read(raw_idx[idx + 4 : idx + 8]),
272+
"mtime_s": bin_read(raw_idx[idx + 8 : idx + 12]),
273+
"mtime_n": bin_read(raw_idx[idx + 12 : idx + 16]),
274+
"dev": bin_read(raw_idx[idx + 16 : idx + 20]),
275+
"ino": bin_read(raw_idx[idx + 20 : idx + 24]),
276+
"mode_type": bin_read(raw_idx[idx + 26 : idx + 28]) >> 12,
277+
"mode_perms": bin_read(raw_idx[idx + 26 : idx + 28]) & 0x1FF,
278+
"uid": bin_read(raw_idx[idx + 28 : idx + 32]),
279+
"gid": bin_read(raw_idx[idx + 32 : idx + 36]),
280+
"file_size": bin_read(raw_idx[idx + 36 : idx + 40]),
281+
"flag_assume_valid": flags & 0x8000,
282+
"flag_stage": flags & 0x3000,
283+
}
284+
sha1: str = format(bin_read(raw_idx[idx + 40 : idx + 60]), "040x")
285+
286+
idx += 62 # read 62 bytes thus far
287+
len_name: int = flags & 0xFFF
288+
289+
if len_name < 0xFFF: # normal case, len(name) is given
290+
assert raw_idx[idx + len_name] == 0x00, f"No NULL at {idx + len_name=}"
291+
raw_name: bytes = raw_idx[idx : idx + len_name]
292+
idx += len_name + 1
293+
else:
294+
idx_null: int = raw_idx.find(b"\x00", idx + 0xFFF)
295+
raw_name = raw_idx[idx:idx_null]
296+
idx += idx_null + 1
297+
298+
idx = (idx + 7) & ~7 # ceil to next multiple of 8
299+
300+
entries.append(GitIndexEntry(**kwargs, sha1=sha1, name=raw_name.decode()))
301+
return GitIndex(version=version, entries=entries)

microprojects/ngit/repository.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,46 @@ def __init__(self, path: str, force: bool = False) -> None:
4545
raise NotImplementedError(f"unsupported repositoryformatversion: {ver}")
4646

4747

48+
class GitIndexEntry(object):
49+
""""""
50+
51+
def __init__(
52+
self, ctime_s: int, ctime_n: int, mtime_s: int, mtime_n: int, dev: int, ino: int,
53+
mode_type: int, mode_perms: int, uid: int, gid: int, file_size: int, sha1: str,
54+
flag_assume_valid: int, flag_stage: int, name: str
55+
) -> None: # fmt: skip
56+
""""""
57+
58+
self.ctime_s: int = ctime_s
59+
self.ctime_n: int = ctime_n
60+
self.mtime_s: int = mtime_s
61+
self.mtime_n: int = mtime_n
62+
self.dev: int = dev
63+
self.ino: int = ino
64+
self.mode_type: int = mode_type
65+
self.mode_perms: int = mode_perms
66+
self.uid: int = uid
67+
self.gid: int = gid
68+
self.file_size: int = file_size
69+
self.sha1: str = sha1
70+
self.flag_assume_valid: int = flag_assume_valid
71+
self.flag_stage: int = flag_stage
72+
self.name: str = name
73+
74+
75+
class GitIndex(object):
76+
""""""
77+
78+
version: int = 2
79+
entries: list[GitIndexEntry]
80+
# ext # NotImplemented
81+
# sha1: str # ignored
82+
83+
def __init__(self, version: int = 2, entries: list[GitIndexEntry] = []) -> None:
84+
self.version = version
85+
self.entries = entries or [] # `or []` is important part
86+
87+
4888
def repo_path(repo: GitRepository, *path: str) -> str:
4989
"""Compute path under repo's git/ directory
5090

0 commit comments

Comments
 (0)