Skip to content

Commit 0ede97f

Browse files
Merge commit from fork
Better repo sanitization for gitdumper
2 parents 4ce929a + 61b6c61 commit 0ede97f

File tree

4 files changed

+30
-13
lines changed

4 files changed

+30
-13
lines changed

bbot/core/helpers/git.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import regex as re
2+
from pathlib import Path
3+
4+
5+
def sanitize_git_repo(repo_folder: Path):
6+
# sanitizing the git config is infeasible since there are too many different ways to do evil things
7+
# instead, we move it out of .git and into the repo folder, so we don't miss any secrets etc. inside
8+
config_file = repo_folder / ".git" / "config"
9+
if config_file.exists():
10+
config_file.rename(repo_folder / "git_config_original")
11+
# move the index file
12+
index_file = repo_folder / ".git" / "index"
13+
if index_file.exists():
14+
index_file.rename(repo_folder / "git_index_original")
15+
# move the hooks folder
16+
hooks_folder = repo_folder / ".git" / "hooks"
17+
if hooks_folder.exists():
18+
hooks_folder.rename(repo_folder / "git_hooks_original")

bbot/core/helpers/misc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from asyncio import create_task, gather, sleep, wait_for # noqa
1818
from urllib.parse import urlparse, quote, unquote, urlunparse, urljoin # noqa F401
1919

20+
from .git import * # noqa F401
2021
from .url import * # noqa F401
2122
from ... import errors
2223
from . import regexes as bbot_regexes

bbot/modules/git_clone.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,11 @@ async def clone_git_repository(self, repository_url):
8282
return
8383

8484
folder_name = output.stderr.split("Cloning into '")[1].split("'")[0]
85-
return folder / folder_name
85+
repo_folder = folder / folder_name
86+
87+
# sanitize the repo
88+
# this moves the git config, index file, and hooks folder out of the .git folder to prevent nasty things
89+
# Note: the index file can be regenerated by running "git checkout HEAD -- ."
90+
self.helpers.sanitize_git_repo(repo_folder)
91+
92+
return repo_folder

bbot/modules/gitdumper.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ async def setup(self):
3535
else:
3636
self.output_dir = self.scan.temp_dir / "git_repos"
3737
self.helpers.mkdir(self.output_dir)
38-
self.unsafe_regex = self.helpers.re.compile(r"^\s*fsmonitor|sshcommand|askpass|editor|pager", re.IGNORECASE)
3938
self.ref_regex = self.helpers.re.compile(r"ref: refs/heads/([a-zA-Z\d_-]+)")
4039
self.obj_regex = self.helpers.re.compile(r"[a-f0-9]{40}")
4140
self.pack_regex = self.helpers.re.compile(r"pack-([a-f0-9]{40})\.pack")
@@ -131,7 +130,6 @@ async def handle_event(self, event):
131130
else:
132131
result = await self.git_fuzz(repo_url, repo_folder)
133132
if result:
134-
await self.sanitize_config(repo_folder)
135133
await self.git_checkout(repo_folder)
136134
codebase_event = self.make_event({"path": str(repo_folder)}, "FILESYSTEM", tags=["git"], parent=event)
137135
await self.emit_event(
@@ -251,15 +249,6 @@ async def download_files(self, urls, folder):
251249
self.debug(f"Unable to download git files to {folder}")
252250
return False
253251

254-
async def sanitize_config(self, folder):
255-
config_file = folder / ".git/config"
256-
if config_file.exists():
257-
with config_file.open("r", encoding="utf-8", errors="ignore") as file:
258-
content = file.read()
259-
sanitized = await self.helpers.re.sub(self.unsafe_regex, r"# \g<0>", content)
260-
with config_file.open("w", encoding="utf-8") as file:
261-
file.write(sanitized)
262-
263252
async def git_catfile(self, hash, option="-t", folder=Path()):
264253
command = ["git", "cat-file", option, hash]
265254
try:
@@ -270,8 +259,10 @@ async def git_catfile(self, hash, option="-t", folder=Path()):
270259
return output.stdout
271260

272261
async def git_checkout(self, folder):
262+
self.helpers.sanitize_git_repo(folder)
273263
self.verbose(f"Running git checkout to reconstruct the git repository at {folder}")
274-
command = ["git", "checkout", "."]
264+
# we do "checkout head -- ." because the sanitization deletes the index file, and it needs to be reconstructed
265+
command = ["git", "checkout", "HEAD", "--", "."]
275266
try:
276267
await self.run_process(command, env={"GIT_TERMINAL_PROMPT": "0"}, cwd=folder, check=True)
277268
except CalledProcessError as e:

0 commit comments

Comments
 (0)