Merge pull request #959 from danigm/duplicate-small

Do not consider files-duplicate for SMALL files
This commit is contained in:
Martin Liška 2022-11-10 16:16:22 +01:00 committed by GitHub
commit 51f4cbd995
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 13 additions and 1 deletions

View File

@ -18,6 +18,10 @@ class DuplicatesCheck(AbstractCheck):
- values: size of the file
"""
def __init__(self, config, output):
super().__init__(config, output)
self.min_size = self.config.configuration.get('DuplicatesMinSize', 0)
def check(self, pkg):
if pkg.is_source:
return
@ -31,6 +35,10 @@ class DuplicatesCheck(AbstractCheck):
if fname in pkg.ghost_files or not stat.S_ISREG(pkgfile.mode):
continue
# Skip small files
if pkgfile.size <= self.min_size:
continue
# fillup md5s and sizes dicts
md5s.setdefault(pkgfile.md5, set()).add(pkgfile)
sizes[pkgfile.md5] = pkgfile.size

View File

@ -290,6 +290,9 @@ ValidLicenseExceptions = []
# Default list of authorized PAM modules
PAMAuthorizedModules = []
# Minimum size of files to check duplicates, in bytes
DuplicatesMinSize = 2
# Additional warnings on specific function calls
[WarnOnFunction]
#[WarnOnFunction.testname]

View File

@ -23,7 +23,8 @@ def test_duplicates(tmpdir, package, duplicatescheck):
assert 'E: hardlink-across-config-files /var/foo2 /etc/foo2' in out
assert 'W: files-duplicate /etc/bar3 /etc/bar:/etc/bar2' in out
assert 'W: files-duplicate /etc/strace2.txt /etc/strace1.txt' in out
assert 'E: files-duplicated-waste 270516' in out
assert 'W: files-duplicate /etc/small2 /etc/small' not in out
assert 'E: files-duplicated-waste 270544' in out
@pytest.mark.parametrize('package', ['binary/bad-crc-uncompressed'])