Skip to content

Commit efbbc77

Browse files
authored
Semgrep harden pyyaml (#710)
* semgrep from core codemod to allow list of rules * new harden pyyaml semgrep codemod
1 parent 1033d88 commit efbbc77

9 files changed

Lines changed: 200 additions & 23 deletions

src/codemodder/scripts/generate_docs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ class DocMetadata:
325325
"jwt-decode-verify",
326326
"use-defusedxml",
327327
"subprocess-shell-false",
328+
"harden-pyyaml",
328329
]
329330
SEMGREP_CODEMODS = {
330331
name: DocMetadata(

src/core_codemods/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
from .secure_random import SecureRandom
5757
from .semgrep.semgrep_django_secure_set_cookie import SemgrepDjangoSecureSetCookie
5858
from .semgrep.semgrep_enable_jinja2_autoescape import SemgrepEnableJinja2Autoescape
59+
from .semgrep.semgrep_harden_pyyaml import SemgrepHardenPyyaml
5960
from .semgrep.semgrep_jwt_decode_verify import SemgrepJwtDecodeVerify
6061
from .semgrep.semgrep_subprocess_shell_false import SemgrepSubprocessShellFalse
6162
from .semgrep.semgrep_use_defused_xml import SemgrepUseDefusedXml
@@ -206,5 +207,6 @@
206207
SemgrepUseDefusedXml,
207208
SemgrepSubprocessShellFalse,
208209
SemgrepDjangoSecureSetCookie,
210+
SemgrepHardenPyyaml,
209211
],
210212
)

src/core_codemods/semgrep/api.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ def from_core_codemod(
2020
cls,
2121
name: str,
2222
other: CoreCodemod,
23-
rule_id: str,
24-
rule_name: str,
23+
rules: list[ToolRule],
2524
transformer: BaseTransformerPipeline | None = None,
2625
):
2726
return SemgrepCodemod(
@@ -33,25 +32,20 @@ def from_core_codemod(
3332
other.references
3433
+ [
3534
Reference(
36-
url=semgrep_url_from_id(rule_id), description=rule_name
35+
url=semgrep_url_from_id(rule.id), description=rule.name
3736
)
37+
for rule in rules
3838
]
3939
),
4040
description=other.description,
4141
tool=ToolMetadata(
4242
name="Semgrep",
43-
rules=[
44-
ToolRule(
45-
id=rule_id,
46-
name=rule_name,
47-
url=semgrep_url_from_id(rule_id),
48-
)
49-
],
43+
rules=rules,
5044
),
5145
),
5246
transformer=transformer if transformer else other.transformer,
5347
detector=SemgrepSarifFileDetector(),
54-
requested_rules=[rule_id],
48+
requested_rules=[rule.id for rule in rules],
5549
)
5650

5751
@classmethod
Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
1+
from codemodder.codemods.base_codemod import ToolRule
12
from core_codemods.defectdojo.semgrep.django_secure_set_cookie import (
23
DjangoSecureSetCookie,
34
)
4-
from core_codemods.semgrep.api import SemgrepCodemod
5+
from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id
56

67
SemgrepDjangoSecureSetCookie = SemgrepCodemod.from_core_codemod(
78
name="django-secure-set-cookie",
89
other=DjangoSecureSetCookie,
9-
rule_id="python.django.security.audit.secure-cookies.django-secure-set-cookie",
10-
rule_name="django-secure-set-cookie",
10+
rules=[
11+
ToolRule(
12+
id=(
13+
rule_id := "python.django.security.audit.secure-cookies.django-secure-set-cookie"
14+
),
15+
name="django-secure-set-cookie",
16+
url=semgrep_url_from_id(rule_id),
17+
)
18+
],
1119
)
Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
1+
from codemodder.codemods.base_codemod import ToolRule
12
from core_codemods.enable_jinja2_autoescape import EnableJinja2Autoescape
2-
from core_codemods.semgrep.api import SemgrepCodemod
3+
from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id
34

45
SemgrepEnableJinja2Autoescape = SemgrepCodemod.from_core_codemod(
56
name="enable-jinja2-autoescape",
67
other=EnableJinja2Autoescape,
7-
rule_id="python.flask.security.xss.audit.direct-use-of-jinja2.direct-use-of-jinja2",
8-
rule_name="direct-use-of-jinja2",
8+
rules=[
9+
ToolRule(
10+
id=(
11+
rule_id := "python.flask.security.xss.audit.direct-use-of-jinja2.direct-use-of-jinja2"
12+
),
13+
name="direct-use-of-jinja2",
14+
url=semgrep_url_from_id(rule_id),
15+
)
16+
],
917
)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from codemodder.codemods.base_codemod import ToolRule
2+
from core_codemods.harden_pyyaml import HardenPyyaml
3+
from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id
4+
5+
SemgrepHardenPyyaml = SemgrepCodemod.from_core_codemod(
6+
name="harden-pyyaml",
7+
other=HardenPyyaml,
8+
rules=[
9+
ToolRule(
10+
id=(
11+
rule_id := "python.lang.security.deserialization.avoid-pyyaml-load.avoid-pyyaml-load"
12+
),
13+
name=" avoid-pyyaml-load",
14+
url=semgrep_url_from_id(rule_id),
15+
),
16+
ToolRule(
17+
id=(
18+
rule_id := "python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization"
19+
),
20+
name="avoid-insecure-deserialization",
21+
url=semgrep_url_from_id(rule_id),
22+
),
23+
],
24+
)
Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
1+
from codemodder.codemods.base_codemod import ToolRule
12
from codemodder.codemods.libcst_transformer import LibcstTransformerPipeline
23
from core_codemods.jwt_decode_verify import (
34
JwtDecodeVerify,
45
JwtDecodeVerifySASTTransformer,
56
)
6-
from core_codemods.semgrep.api import SemgrepCodemod
7+
from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id
78

89
SemgrepJwtDecodeVerify = SemgrepCodemod.from_core_codemod(
910
name="jwt-decode-verify",
1011
other=JwtDecodeVerify,
11-
rule_id="python.jwt.security.unverified-jwt-decode.unverified-jwt-decode",
12-
rule_name="unverified-jwt-decode",
12+
rules=[
13+
ToolRule(
14+
id=(
15+
rule_id := "python.jwt.security.unverified-jwt-decode.unverified-jwt-decode"
16+
),
17+
name="unverified-jwt-decode",
18+
url=semgrep_url_from_id(rule_id),
19+
)
20+
],
1321
transformer=LibcstTransformerPipeline(JwtDecodeVerifySASTTransformer),
1422
)
Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
1-
from core_codemods.semgrep.api import SemgrepCodemod
1+
from codemodder.codemods.base_codemod import ToolRule
2+
from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id
23
from core_codemods.subprocess_shell_false import SubprocessShellFalse
34

45
SemgrepSubprocessShellFalse = SemgrepCodemod.from_core_codemod(
56
name="subprocess-shell-false",
67
other=SubprocessShellFalse,
7-
rule_id="python.lang.security.audit.subprocess-shell-true.subprocess-shell-true",
8-
rule_name="subprocess-shell-true",
8+
rules=[
9+
ToolRule(
10+
id=(
11+
rule_id := "python.lang.security.audit.subprocess-shell-true.subprocess-shell-true"
12+
),
13+
name="subprocess-shell-true",
14+
url=semgrep_url_from_id(rule_id),
15+
)
16+
],
917
)
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import json
2+
3+
from codemodder.codemods.test import BaseSASTCodemodTest
4+
from core_codemods.semgrep.semgrep_harden_pyyaml import SemgrepHardenPyyaml
5+
6+
7+
class TestSemgrepHardenPyyaml(BaseSASTCodemodTest):
8+
codemod = SemgrepHardenPyyaml
9+
tool = "semgrep"
10+
11+
def test_name(self):
12+
assert self.codemod.name == "harden-pyyaml"
13+
14+
def test_pyyaml(self, tmpdir):
15+
input_code = """\
16+
import yaml
17+
data = b'!!python/object/apply:subprocess.Popen \\n- ls'
18+
deserialized_data = yaml.load(data, Loader=yaml.Loader)
19+
"""
20+
expected_output = """\
21+
import yaml
22+
data = b'!!python/object/apply:subprocess.Popen \\n- ls'
23+
deserialized_data = yaml.load(data, Loader=yaml.SafeLoader)
24+
"""
25+
26+
results = {
27+
"runs": [
28+
{
29+
"results": [
30+
{
31+
"fingerprints": {"matchBasedId/v1": "123"},
32+
"locations": [
33+
{
34+
"physicalLocation": {
35+
"artifactLocation": {
36+
"uri": "code.py",
37+
"uriBaseId": "%SRCROOT%",
38+
},
39+
"region": {
40+
"endColumn": 56,
41+
"endLine": 3,
42+
"snippet": {
43+
"text": "deserialized_data = yaml.load(data, Loader=yaml.Loader)"
44+
},
45+
"startColumn": 21,
46+
"startLine": 3,
47+
},
48+
}
49+
}
50+
],
51+
"message": {
52+
"text": "Detected a possible YAML deserialization vulnerability. `yaml.unsafe_load`, `yaml.Loader`, `yaml.CLoader`, and `yaml.UnsafeLoader` are all known to be unsafe methods of deserializing YAML. An attacker with control over the YAML input could create special YAML input that allows the attacker to run arbitrary Python code. This would allow the attacker to steal files, download and install malware, or otherwise take over the machine. Use `yaml.safe_load` or `yaml.SafeLoader` instead."
53+
},
54+
"properties": {},
55+
"ruleId": "python.lang.security.deserialization.avoid-pyyaml-load.avoid-pyyaml-load",
56+
}
57+
]
58+
}
59+
]
60+
}
61+
self.run_and_assert(
62+
tmpdir,
63+
input_code,
64+
expected_output,
65+
results=json.dumps(results),
66+
)
67+
68+
def test_pyyaml_django(self, tmpdir):
69+
input_code = """\
70+
import yaml
71+
72+
def index(request):
73+
cookie = request.cookies.get('cookie')
74+
return "Hey there! {}!".format(yaml.load(cookie))
75+
"""
76+
expected_output = """\
77+
import yaml
78+
79+
def index(request):
80+
cookie = request.cookies.get('cookie')
81+
return "Hey there! {}!".format(yaml.load(cookie, Loader=yaml.SafeLoader))
82+
"""
83+
84+
results = {
85+
"runs": [
86+
{
87+
"results": [
88+
{
89+
"fingerprints": {"matchBasedId/v1": "123"},
90+
"locations": [
91+
{
92+
"physicalLocation": {
93+
"artifactLocation": {
94+
"uri": "code.py",
95+
"uriBaseId": "%SRCROOT%",
96+
},
97+
"region": {
98+
"endColumn": 53,
99+
"endLine": 5,
100+
"snippet": {
101+
"text": ' return "Hey there! {}!".format(yaml.load(cookie))'
102+
},
103+
"startColumn": 36,
104+
"startLine": 5,
105+
},
106+
}
107+
}
108+
],
109+
"message": {
110+
"text": "Avoid using insecure deserialization library, backed by `pickle`, `_pickle`, `cpickle`, `dill`, `shelve`, or `yaml`, which are known to lead to remote code execution vulnerabilities."
111+
},
112+
"properties": {},
113+
"ruleId": "python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization",
114+
}
115+
]
116+
}
117+
]
118+
}
119+
self.run_and_assert(
120+
tmpdir,
121+
input_code,
122+
expected_output,
123+
results=json.dumps(results),
124+
)

0 commit comments

Comments
 (0)