Skip to content

Commit 40bcaa7

Browse files
authored
Hash_code calculation with Vulnerability Ids (DefectDojo#6220)
* Squash all commits * more changes * changes after rebase * fix for hash_code calculation * flake8 * support for multiple aliases
1 parent 0b1f95c commit 40bcaa7

33 files changed

+329
-152
lines changed

docker/entrypoint-unit-tests-devDocker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ cd /app
1010
# Unset the database URL so that we can force the DD_TEST_DATABASE_NAME (see django "DATABASES" configuration in settings.dist.py)
1111
unset DD_DATABASE_URL
1212

13-
# Unset the celery broker URL so that we can force the other DD_CELERY_BROKER settings
13+
# Unset the celery broker URL so that we can force the other DD_CELERY_BROKER settings
1414
unset DD_CELERY_BROKER_URL
1515

1616
python3 manage.py makemigrations dojo

dojo/models.py

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2151,15 +2151,20 @@ def compute_hash_code(self):
21512151

21522152
fields_to_hash = ''
21532153
for hashcodeField in hash_code_fields:
2154-
if(hashcodeField != 'endpoints'):
2155-
# Generically use the finding attribute having the same name, converts to str in case it's integer
2156-
fields_to_hash = fields_to_hash + str(getattr(self, hashcodeField))
2157-
deduplicationLogger.debug(hashcodeField + ' : ' + str(getattr(self, hashcodeField)))
2158-
else:
2154+
if hashcodeField == 'endpoints':
21592155
# For endpoints, need to compute the field
21602156
myEndpoints = self.get_endpoints()
21612157
fields_to_hash = fields_to_hash + myEndpoints
21622158
deduplicationLogger.debug(hashcodeField + ' : ' + myEndpoints)
2159+
elif hashcodeField == 'vulnerability_ids':
2160+
# For vulnerability_ids, need to compute the field
2161+
my_vulnerability_ids = self.get_vulnerability_ids()
2162+
fields_to_hash = fields_to_hash + my_vulnerability_ids
2163+
deduplicationLogger.debug(hashcodeField + ' : ' + my_vulnerability_ids)
2164+
else:
2165+
# Generically use the finding attribute having the same name, converts to str in case it's integer
2166+
fields_to_hash = fields_to_hash + str(getattr(self, hashcodeField))
2167+
deduplicationLogger.debug(hashcodeField + ' : ' + str(getattr(self, hashcodeField)))
21632168
deduplicationLogger.debug("compute_hash_code - fields_to_hash = " + fields_to_hash)
21642169
return self.hash_fields(fields_to_hash)
21652170

@@ -2168,6 +2173,35 @@ def compute_hash_code_legacy(self):
21682173
deduplicationLogger.debug("compute_hash_code_legacy - fields_to_hash = " + fields_to_hash)
21692174
return self.hash_fields(fields_to_hash)
21702175

2176+
# Get vulnerability_ids to use for hash_code computation
2177+
def get_vulnerability_ids(self):
2178+
vulnerability_id_str = ''
2179+
if self.id is None:
2180+
if self.unsaved_vulnerability_ids:
2181+
deduplicationLogger.debug("get_vulnerability_ids before the finding was saved")
2182+
# convert list of unsaved vulnerability_ids to the list of their canonical representation
2183+
vulnerability_id_str_list = list(
2184+
map(
2185+
lambda vulnerability_id: str(vulnerability_id),
2186+
self.unsaved_vulnerability_ids
2187+
))
2188+
# deduplicate (usually done upon saving finding) and sort endpoints
2189+
vulnerability_id_str = ''.join(sorted(list(dict.fromkeys(vulnerability_id_str_list))))
2190+
else:
2191+
deduplicationLogger.debug("finding has no unsaved vulnerability references")
2192+
else:
2193+
vulnerability_ids = Vulnerability_Id.objects.filter(finding=self)
2194+
deduplicationLogger.debug("get_vulnerability_ids after the finding was saved. Vulnerability references count: " + str(vulnerability_ids.count()))
2195+
# convert list of vulnerability_ids to the list of their canonical representation
2196+
vulnerability_id_str_list = list(
2197+
map(
2198+
lambda vulnerability_id: str(vulnerability_id),
2199+
vulnerability_ids.all()
2200+
))
2201+
# sort vulnerability_ids strings
2202+
vulnerability_id_str = ''.join(sorted(vulnerability_id_str_list))
2203+
return vulnerability_id_str
2204+
21712205
# Get endpoints to use for hash_code computation
21722206
# (This sometimes reports "None")
21732207
def get_endpoints(self):
@@ -2636,6 +2670,9 @@ class Vulnerability_Id(models.Model):
26362670
finding = models.ForeignKey(Finding, editable=False, on_delete=models.CASCADE)
26372671
vulnerability_id = models.TextField(max_length=50, blank=False, null=False)
26382672

2673+
def __str__(self):
2674+
return self.vulnerability_id
2675+
26392676

26402677
class Stub_Finding(models.Model):
26412678
title = models.TextField(max_length=1000, blank=False, null=False)

dojo/settings/settings.dist.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,43 +1053,43 @@ def saml2_attrib_map_format(dict):
10531053
# Including the severity in the hash_code keeps those findings not duplicate
10541054
'Anchore Engine Scan': ['title', 'severity', 'component_name', 'component_version', 'file_path'],
10551055
'Anchore Grype': ['title', 'severity', 'component_name', 'component_version'],
1056-
'Aqua Scan': ['severity', 'cve', 'component_name', 'component_version'],
1056+
'Aqua Scan': ['severity', 'vulnerability_ids', 'component_name', 'component_version'],
10571057
'Bandit Scan': ['file_path', 'line', 'vuln_id_from_tool'],
1058-
'CargoAudit Scan': ['cve', 'severity', 'component_name', 'component_version', 'vuln_id_from_tool'],
1058+
'CargoAudit Scan': ['vulnerability_ids', 'severity', 'component_name', 'component_version', 'vuln_id_from_tool'],
10591059
'Checkmarx Scan': ['cwe', 'severity', 'file_path'],
1060-
'Checkmarx OSA': ['cve', 'component_name'],
1060+
'Checkmarx OSA': ['vulnerability_ids', 'component_name'],
10611061
'Cloudsploit Scan': ['title', 'description'],
10621062
'SonarQube Scan': ['cwe', 'severity', 'file_path'],
10631063
'SonarQube API Import': ['title', 'file_path', 'line'],
1064-
'Dependency Check Scan': ['cve', 'cwe', 'file_path'],
1064+
'Dependency Check Scan': ['vulnerability_ids', 'cwe', 'file_path'],
10651065
'Dockle Scan': ['title', 'description', 'vuln_id_from_tool'],
1066-
'Dependency Track Finding Packaging Format (FPF) Export': ['component_name', 'component_version', 'cwe', 'cve'],
1066+
'Dependency Track Finding Packaging Format (FPF) Export': ['component_name', 'component_version', 'cwe', 'vulnerability_ids'],
10671067
'Mobsfscan Scan': ['title', 'severity', 'cwe'],
1068-
'Nessus Scan': ['title', 'severity', 'cve', 'cwe'],
1069-
'Nexpose Scan': ['title', 'severity', 'cve', 'cwe'],
1068+
'Nessus Scan': ['title', 'severity', 'vulnerability_ids', 'cwe'],
1069+
'Nexpose Scan': ['title', 'severity', 'vulnerability_ids', 'cwe'],
10701070
# possible improvement: in the scanner put the library name into file_path, then dedup on cwe + file_path + severity
1071-
'NPM Audit Scan': ['title', 'severity', 'file_path', 'cve', 'cwe'],
1071+
'NPM Audit Scan': ['title', 'severity', 'file_path', 'vulnerability_ids', 'cwe'],
10721072
# possible improvement: in the scanner put the library name into file_path, then dedup on cwe + file_path + severity
1073-
'Yarn Audit Scan': ['title', 'severity', 'file_path', 'cve', 'cwe'],
1074-
# possible improvement: in the scanner put the library name into file_path, then dedup on cve + file_path + severity
1073+
'Yarn Audit Scan': ['title', 'severity', 'file_path', 'vulnerability_ids', 'cwe'],
1074+
# possible improvement: in the scanner put the library name into file_path, then dedup on vulnerability_ids + file_path + severity
10751075
'Whitesource Scan': ['title', 'severity', 'description'],
10761076
'ZAP Scan': ['title', 'cwe', 'severity'],
10771077
'Qualys Scan': ['title', 'severity'],
10781078
# 'Qualys Webapp Scan': ['title', 'unique_id_from_tool'],
1079-
'PHP Symfony Security Check': ['title', 'cve'],
1080-
'Clair Scan': ['title', 'cve', 'description', 'severity'],
1079+
'PHP Symfony Security Check': ['title', 'vulnerability_ids'],
1080+
'Clair Scan': ['title', 'vulnerability_ids', 'description', 'severity'],
10811081
'Clair Klar Scan': ['title', 'description', 'severity'],
10821082
# for backwards compatibility because someone decided to rename this scanner:
1083-
'Symfony Security Check': ['title', 'cve'],
1084-
'DSOP Scan': ['cve'],
1083+
'Symfony Security Check': ['title', 'vulnerability_ids'],
1084+
'DSOP Scan': ['vulnerability_ids'],
10851085
'Acunetix Scan': ['title', 'description'],
10861086
'Terrascan Scan': ['vuln_id_from_tool', 'title', 'severity', 'file_path', 'line', 'component_name'],
1087-
'Trivy Scan': ['title', 'severity', 'cve', 'cwe'],
1087+
'Trivy Scan': ['title', 'severity', 'vulnerability_ids', 'cwe'],
10881088
'TFSec Scan': ['severity', 'vuln_id_from_tool', 'file_path', 'line'],
10891089
'Snyk Scan': ['vuln_id_from_tool', 'file_path', 'component_name', 'component_version'],
1090-
'GitLab Dependency Scanning Report': ['title', 'cve', 'file_path', 'component_name', 'component_version'],
1090+
'GitLab Dependency Scanning Report': ['title', 'vulnerability_ids', 'file_path', 'component_name', 'component_version'],
10911091
'SpotBugs Scan': ['cwe', 'severity', 'file_path', 'line'],
1092-
'JFrog Xray Unified Scan': ['cve', 'file_path', 'component_name', 'component_version'],
1092+
'JFrog Xray Unified Scan': ['vulnerability_ids', 'file_path', 'component_name', 'component_version'],
10931093
'Scout Suite Scan': ['file_path', 'vuln_id_from_tool'], # for now we use file_path as there is no attribute for "service"
10941094
'AWS Security Hub Scan': ['unique_id_from_tool'],
10951095
'Meterian Scan': ['cwe', 'component_name', 'component_version', 'description', 'severity'],
@@ -1145,7 +1145,7 @@ def saml2_attrib_map_format(dict):
11451145
# List of fields that are known to be usable in hash_code computation)
11461146
# 'endpoints' is a pseudo field that uses the endpoints (for dynamic scanners)
11471147
# 'unique_id_from_tool' is often not needed here as it can be used directly in the dedupe algorithm, but it's also possible to use it for hashing
1148-
HASHCODE_ALLOWED_FIELDS = ['title', 'cwe', 'cve', 'line', 'file_path', 'component_name', 'component_version', 'description', 'endpoints', 'unique_id_from_tool', 'severity', 'vuln_id_from_tool']
1148+
HASHCODE_ALLOWED_FIELDS = ['title', 'cwe', 'vulnerability_ids', 'line', 'file_path', 'component_name', 'component_version', 'description', 'endpoints', 'unique_id_from_tool', 'severity', 'vuln_id_from_tool']
11491149

11501150
# Adding fields to the hash_code calculation regardless of the previous settings
11511151
HASH_CODE_FIELDS_ALWAYS = ['service']
@@ -1442,4 +1442,5 @@ def saml2_attrib_map_format(dict):
14421442
'OSV': 'https://osv.dev/vulnerability/',
14431443
'PYSEC': 'https://osv.dev/vulnerability/',
14441444
'SNYK': 'https://snyk.io/vuln/',
1445+
'RUSTSEC': 'https://rustsec.org/advisories/',
14451446
}

dojo/tools/aqua/parser.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def get_items(self, tree, test):
4242
def get_item(resource, vuln, test):
4343
resource_name = resource.get('name', resource.get('path'))
4444
resource_version = resource.get('version', 'No version')
45-
cve = vuln.get('name', 'No CVE')
45+
vulnerability_id = vuln.get('name', 'No CVE')
4646
fix_version = vuln.get('fix_version', 'None')
4747
description = vuln.get('description', 'No description.')
4848
cvssv3 = None
@@ -84,24 +84,27 @@ def get_item(resource, vuln, test):
8484
severity = severity_of(score)
8585
severity_justification += "\n{}".format(used_for_classification)
8686

87-
return Finding(
88-
title=cve + " - " + resource_name + " (" + resource_version + ") ",
87+
finding = Finding(
88+
title=vulnerability_id + " - " + resource_name + " (" + resource_version + ") ",
8989
test=test,
9090
severity=severity,
9191
severity_justification=severity_justification,
9292
cwe=0,
93-
cve=cve,
9493
cvssv3=cvssv3,
9594
description=description.strip(),
9695
mitigation=fix_version,
9796
references=url,
9897
component_name=resource.get('name'),
9998
component_version=resource.get('version'),
10099
impact=severity)
100+
if vulnerability_id != 'No CVE':
101+
finding.unsaved_vulnerability_ids = [vulnerability_id]
102+
103+
return finding
101104

102105

103106
def get_item_v2(item, test):
104-
cve = item['name']
107+
vulnerability_id = item['name']
105108
file_path = item['file']
106109
url = item.get('url')
107110
severity = severity_of(float(item['score']))
@@ -115,15 +118,17 @@ def get_item_v2(item, test):
115118
else:
116119
mitigation = 'No known mitigation'
117120

118-
return Finding(title=str(cve) + ': ' + str(file_path),
121+
finding = Finding(title=str(vulnerability_id) + ': ' + str(file_path),
119122
description=description,
120123
url=url,
121124
cwe=0,
122-
cve=cve,
123125
test=test,
124126
severity=severity,
125127
impact=severity,
126128
mitigation=mitigation)
129+
finding.unsaved_vulnerability_ids = [vulnerability_id]
130+
131+
return finding
127132

128133

129134
def aqua_severity_of(score):

dojo/tools/cargo_audit/parser.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def get_findings(self, filename, test):
2424
for item in data.get('vulnerabilities').get('list'):
2525
advisory = item.get('advisory')
2626
vuln_id = advisory.get('id')
27+
vulnerability_ids = [advisory.get('id')]
2728
if "categories" in advisory:
2829
categories = f"**Categories:** {', '.join(advisory['categories'])}"
2930
else:
@@ -38,10 +39,8 @@ def get_findings(self, filename, test):
3839
references = f"{advisory.get('url')}\n" + '\n'.join(advisory['references'])
3940
date = advisory.get('date')
4041

41-
if len(advisory.get('aliases')) != 0:
42-
cve = advisory.get('aliases')[0]
43-
else:
44-
cve = None
42+
for alias in advisory.get('aliases', []):
43+
vulnerability_ids.append(alias)
4544

4645
package_name = item.get('package').get('name')
4746
package_version = item.get('package').get('version')
@@ -56,7 +55,7 @@ def get_findings(self, filename, test):
5655
except KeyError:
5756
mitigation = "No information about patched version"
5857
dupe_key = hashlib.sha256(
59-
(vuln_id + str(cve) + date + package_name + package_version).encode('utf-8')
58+
(vuln_id + date + package_name + package_version).encode('utf-8')
6059
).hexdigest()
6160

6261
if dupe_key in dupes:
@@ -67,7 +66,6 @@ def get_findings(self, filename, test):
6766
title=title,
6867
test=test,
6968
severity=severity,
70-
cve=cve,
7169
tags=tags,
7270
description=description,
7371
component_name=package_name,
@@ -78,5 +76,6 @@ def get_findings(self, filename, test):
7876
references=references,
7977
mitigation=mitigation
8078
)
79+
finding.unsaved_vulnerability_ids = vulnerability_ids
8180
dupes[dupe_key] = finding
8281
return list(dupes.values())

dojo/tools/checkmarx_osa/parser.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,16 @@ def get_findings(self, filehandle, test):
3838

3939
# Possible status as per checkmarx 9.2: TO_VERIFY, NOT_EXPLOITABLE, CONFIRMED, URGENT, PROPOSED_NOT_EXPLOITABLE
4040
status = item['state']['name']
41-
cve = item.get('cveName', 'NC')
41+
vulnerability_id = item.get('cveName', 'NC')
4242
finding_item = Finding(
43-
title='{0} {1} | {2}'.format(library['name'], library['version'], cve),
43+
title='{0} {1} | {2}'.format(library['name'], library['version'], vulnerability_id),
4444
severity=item['severity']['name'],
4545
description=item.get('description', 'NC'),
4646
unique_id_from_tool=item.get('id', None),
4747
references=item.get('url', None),
4848
mitigation=item.get('recommendations', None),
4949
component_name=library['name'],
5050
component_version=library['version'],
51-
cve=cve,
5251
# 1035 is "Using Components with Known Vulnerabilities"
5352
# Possible improvment: get the CWE from the CVE using some database?
5453
# nvd.nist.gov has the info; see for eg https://nvd.nist.gov/vuln/detail/CVE-2020-25649 "Weakness Enumeration"
@@ -63,6 +62,8 @@ def get_findings(self, filehandle, test):
6362
verified=status != 'TO_VERIFY' and status != 'NOT_EXPLOITABLE' and status != 'PROPOSED_NOT_EXPLOITABLE',
6463
test=test
6564
)
65+
if vulnerability_id != 'NC':
66+
finding_item.unsaved_vulnerability_ids = [vulnerability_id]
6667
items.append(finding_item)
6768
return items
6869

dojo/tools/clair/parser.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ def get_item(item_node, test):
5656
references=item_node['link'],
5757
component_name=item_node['featurename'],
5858
component_version=item_node['featureversion'],
59-
cve=item_node['vulnerability'],
6059
false_p=False,
6160
duplicate=False,
6261
out_of_scope=False,
@@ -65,4 +64,7 @@ def get_item(item_node, test):
6564
dynamic_finding=False,
6665
impact="No impact provided")
6766

67+
if item_node['vulnerability']:
68+
finding.unsaved_vulnerability_ids = [item_node['vulnerability']]
69+
6870
return finding

dojo/tools/dependency_check/parser.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class DependencyCheckParser(object):
2323

2424
def add_finding(self, finding, dupes):
2525
key_str = '|'.join([
26-
str(finding.cve),
26+
str(finding.title),
2727
str(finding.cwe),
2828
str(finding.file_path).lower()
2929
])
@@ -137,11 +137,11 @@ def get_finding_from_vulnerability(self, dependency, related_dependency, vulnera
137137
# I need the notes field since this is how the suppression is documented.
138138
notes = vulnerability.findtext(f'.//{namespace}notes')
139139

140-
cve = name[:28]
141-
if cve and not cve.startswith('CVE'):
140+
vulnerability_id = name[:28]
141+
if vulnerability_id and not vulnerability_id.startswith('CVE'):
142142
# for vulnerability sources which have a CVE, it is the start of the 'name'.
143143
# for other sources, we have to set it to None
144-
cve = None
144+
vulnerability_id = None
145145

146146
# Use CWE-1035 as fallback
147147
cwe = 1035 # Vulnerable Third Party Component
@@ -219,12 +219,11 @@ def get_finding_from_vulnerability(self, dependency, related_dependency, vulnera
219219
description += '\n**Filepath:** ' + str(dependency_filepath)
220220
active = True
221221

222-
return Finding(
222+
finding = Finding(
223223
title=f'{component_name}:{component_version} | {name}',
224224
file_path=dependency_filename,
225225
test=test,
226226
cwe=cwe,
227-
cve=cve,
228227
description=description,
229228
severity=severity,
230229
mitigation=mitigation,
@@ -237,6 +236,11 @@ def get_finding_from_vulnerability(self, dependency, related_dependency, vulnera
237236
component_version=component_version,
238237
)
239238

239+
if vulnerability_id:
240+
finding.unsaved_vulnerability_ids = [vulnerability_id]
241+
242+
return finding
243+
240244
def get_scan_types(self):
241245
return ["Dependency Check Scan"]
242246

0 commit comments

Comments
 (0)