Python 3 fixes

2015-07-08 10:41:33 +03:00 · 2015-07-08 10:41:33 +03:00 · c9e80c7848
parent c191d7cecd
commit c9e80c7848
3 changed files with 77 additions and 83 deletions
--- a/FilesCheck.py
+++ b/FilesCheck.py
@ -17,7 +17,7 @@ import sys
 import rpm

 from Filter import addDetails, printError, printWarning
-from Pkg import b2s, catcmd, getstatusoutput, is_utf8, is_utf8_str
+from Pkg import b2s, catcmd, getstatusoutput, is_utf8, is_utf8_bytestr
 import AbstractCheck
 import Config

@ -394,17 +394,17 @@ class FilesCheck(AbstractCheck.AbstractCheck):

    def check(self, pkg):

-        files = pkg.files()
-
        if use_utf8:
-            for filename in files:
-                if not is_utf8_str(filename):
-                    printError(pkg, 'filename-not-utf8', filename)
+            for filename in pkg.header[rpm.RPMTAG_FILENAMES] or ():
+                if not is_utf8_bytestr(filename):
+                    printError(pkg, 'filename-not-utf8', b2s(filename))

        # Rest of the checks are for binary packages only
        if pkg.isSource():
            return

+        files = pkg.files()
+
        # Check if the package is a development package
        devel_pkg = devel_regex.search(pkg.name)

--- a/Pkg.py
+++ b/Pkg.py
@ -13,7 +13,6 @@ import re
 import subprocess
 import sys
 import tempfile
-import unicodedata
 try:
    from urlparse import urljoin
 except:
@ -37,6 +36,7 @@ if sys.version_info[0] > 2:
    # Blows up with Python < 3 without the exec() hack
    exec('def warn(s): print (s, file=sys.stderr)')
    long = int
+    unicode = str

    def b2s(b):
        if b is None:
@ -102,7 +102,7 @@ def substitute_shell_vars(val, script):
        return val


-def getstatusoutput(cmd, stdoutonly=False, shell=False):
+def getstatusoutput(cmd, stdoutonly=False, shell=False, raw=False):
    '''A version of commands.getstatusoutput() which can take cmd as a
       sequence, thus making it potentially more secure.'''
    if stdoutonly:
@ -113,12 +113,14 @@ def getstatusoutput(cmd, stdoutonly=False, shell=False):
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT, close_fds=True)
    proc.stdin.close()
-    text = b2s(proc.stdout.read())
+    text = proc.stdout.read()
+    if not raw:
+        text = b2s(text)
+        if text.endswith('\n'):
+            text = text[:-1]
    sts = proc.wait()
    if sts is None:
        sts = 0
-    if text.endswith('\n'):
-        text = text[:-1]
    return sts, text

 bz2_regex = re.compile('\.t?bz2?$')
@ -136,50 +138,32 @@ def catcmd(fname):


 def is_utf8(fname):
-    (sts, text) = getstatusoutput(catcmd(fname).split() + [fname])
-    return not sts and is_utf8_str(text)
-
-REPLACEMENT_CHAR = unicodedata.lookup('REPLACEMENT CHARACTER')
+    (sts, output) = getstatusoutput(catcmd(fname).split() + [fname], raw=True)
+    return not sts and is_utf8_bytestr(output)


-def is_utf8_str(s):
-    if hasattr(s, 'decode'):
-        # byte string
+def is_utf8_bytestr(s):
    try:
        s.decode('UTF-8')
    except:
        return False
    return True
-    # unicode string
-    return REPLACEMENT_CHAR not in s


-# TODO: PY3
-def to_utf8(string):
+def to_unicode(string):
    if string is None:
-        return ''
+        return unicode('')
    elif isinstance(string, unicode):
        return string
-    try:
-        x = unicode(string, 'ascii')
-        return string
-    except UnicodeError:
-        encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
-        for enc in encodings:
+    for enc in ('utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'):
        try:
            x = unicode(string, enc)
        except UnicodeError:
            pass
        else:
            if x.encode(enc) == string:
-                    return x.encode('utf-8')
-    newstring = ''
-    for char in string:
-        if ord(char) > 127:
-            newstring = newstring + '?'
-        else:
-            newstring = newstring + char
-    return newstring
+                return x
+    return unicode(string, "ascii", errors=replace)


 def readlines(path):
@ -494,7 +478,7 @@ class Pkg:
                os.close(fd)
            self.is_source = not self.header[rpm.RPMTAG_SOURCERPM]

-        self.name = b2s(self.header[rpm.RPMTAG_NAME])
+        self.name = self[rpm.RPMTAG_NAME]
        if self.isNoSource():
            self.arch = 'nosrc'
        elif self.isSource():
@ -520,11 +504,11 @@ class Pkg:
        if val == []:
            return None
        else:
-            if key in (rpm.RPMTAG_VERSION, rpm.RPMTAG_RELEASE, rpm.RPMTAG_ARCH,
-                       rpm.RPMTAG_GROUP, rpm.RPMTAG_BUILDHOST,
-                       rpm.RPMTAG_LICENSE, rpm.RPMTAG_CHANGELOGNAME,
-                       rpm.RPMTAG_CHANGELOGTEXT, rpm.RPMTAG_SUMMARY,
-                       rpm.RPMTAG_DESCRIPTION, rpm.RPMTAG_HEADERI18NTABLE,
+            # Note that text tags we want to try decoding for real in TagsCheck
+            # such as summary, description and changelog are not here.
+            if key in (rpm.RPMTAG_NAME, rpm.RPMTAG_VERSION, rpm.RPMTAG_RELEASE,
+                       rpm.RPMTAG_ARCH, rpm.RPMTAG_GROUP, rpm.RPMTAG_BUILDHOST,
+                       rpm.RPMTAG_LICENSE, rpm.RPMTAG_HEADERI18NTABLE,
                       rpm.RPMTAG_PACKAGER, rpm.RPMTAG_SOURCERPM) \
            or key in (x[0] for x in SCRIPT_TAGS) \
            or key in (x[1] for x in SCRIPT_TAGS):
--- a/TagsCheck.py
+++ b/TagsCheck.py
@ -466,9 +466,10 @@ def spell_check(pkg, str, fmt, lang, ignored):
        if checker:
            # squeeze whitespace to ease leading context check
            checker.set_text(re.sub(r'\s+', ' ', str))
-            uppername = pkg.name.upper()
            if use_utf8:
-                uppername = Pkg.to_utf8(uppername).decode('utf-8')
+                uppername = Pkg.to_unicode(pkg.header[rpm.RPMTAG_NAME]).upper()
+            else:
+                uppername = pkg.name.upper()
            upperparts = uppername.split('-')
            if lang.startswith('en'):
                ups = [x + "'S" for x in upperparts]
@ -538,8 +539,10 @@ class TagsCheck(AbstractCheck.AbstractCheck):
    def _unexpanded_macros(self, pkg, tagname, value, is_url=False):
        if not value:
            return
-        # str(value) because value might be a list
-        for match in AbstractCheck.macro_regex.findall(str(value)):
+        if not isinstance(value, (list, tuple)):
+            value = [value]
+        for val in value:
+            for match in AbstractCheck.macro_regex.findall(val):
                # Do not warn about %XX URL escapes
                if is_url and re.match('^%[0-9A-F][0-9A-F]$', match, re.I):
                    continue
@ -688,7 +691,7 @@ class TagsCheck(AbstractCheck.AbstractCheck):
        summary = pkg[rpm.RPMTAG_SUMMARY]
        if summary:
            if not langs:
-                self._unexpanded_macros(pkg, 'Summary', summary)
+                self._unexpanded_macros(pkg, 'Summary', Pkg.b2s(summary))
            else:
                for lang in langs:
                    self.check_summary(pkg, lang, ignored_words)
@ -698,7 +701,8 @@ class TagsCheck(AbstractCheck.AbstractCheck):
        description = pkg[rpm.RPMTAG_DESCRIPTION]
        if description:
            if not langs:
-                self._unexpanded_macros(pkg, '%description', description)
+                self._unexpanded_macros(pkg, '%description',
+                                        Pkg.b2s(description))
            else:
                for lang in langs:
                    self.check_description(pkg, lang, ignored_words)
@ -726,11 +730,11 @@ class TagsCheck(AbstractCheck.AbstractCheck):
        else:
            clt = pkg[rpm.RPMTAG_CHANGELOGTEXT]
            if use_version_in_changelog:
-                ret = changelog_version_regex.search(changelog[0])
+                ret = changelog_version_regex.search(Pkg.b2s(changelog[0]))
                if not ret and clt:
                    # we also allow the version specified as the first
                    # thing on the first line of the text
-                    ret = changelog_text_version_regex.search(clt[0])
+                    ret = changelog_text_version_regex.search(Pkg.b2s(clt[0]))
                if not ret:
                    printWarning(pkg, 'no-version-in-last-changelog')
                elif version and release:
@ -751,10 +755,13 @@ class TagsCheck(AbstractCheck.AbstractCheck):
                            printWarning(pkg, 'incoherent-version-in-changelog',
                                         ret.group(1), expected)

+            if use_utf8:
                if clt:
                    changelog = changelog + clt
-            if use_utf8 and not Pkg.is_utf8_str(' '.join(changelog)):
+                for s in changelog:
+                    if not Pkg.is_utf8_bytestr(s):
                        printError(pkg, 'tag-not-utf8', '%changelog')
+                        break

            clt = pkg[rpm.RPMTAG_CHANGELOGTIME][0]
            if clt:
@ -870,12 +877,16 @@ class TagsCheck(AbstractCheck.AbstractCheck):

    def check_description(self, pkg, lang, ignored_words):
        description = pkg.langtag(rpm.RPMTAG_DESCRIPTION, lang)
-        self._unexpanded_macros(pkg, '%%description -l %s' % lang, description)
-        utf8desc = description
        if use_utf8:
-            utf8desc = Pkg.to_utf8(description).decode('utf-8')
-        spell_check(pkg, utf8desc, '%%description -l %s', lang, ignored_words)
-        for l in utf8desc.splitlines():
+            if not Pkg.is_utf8_bytestr(description):
+                printError(pkg, 'tag-not-utf8', '%description', lang)
+            description = Pkg.to_unicode(description)
+        else:
+            description = Pkg.b2s(description)
+        self._unexpanded_macros(pkg, '%%description -l %s' % lang, description)
+        spell_check(pkg, description, '%%description -l %s', lang,
+                    ignored_words)
+        for l in description.splitlines():
            if len(l) > max_line_len:
                printError(pkg, 'description-line-too-long', lang, l)
            res = forbidden_words_regex.search(l)
@ -885,23 +896,24 @@ class TagsCheck(AbstractCheck.AbstractCheck):
            res = tag_regex.search(l)
            if res:
                printWarning(pkg, 'tag-in-description', lang, res.group(1))
-        if use_utf8 and not Pkg.is_utf8_str(description):
-            printError(pkg, 'tag-not-utf8', '%description', lang)

    def check_summary(self, pkg, lang, ignored_words):
        summary = pkg.langtag(rpm.RPMTAG_SUMMARY, lang)
-        self._unexpanded_macros(pkg, 'Summary(%s)' % lang, summary)
-        utf8summary = summary
        if use_utf8:
-            utf8summary = Pkg.to_utf8(summary).decode('utf-8')
-        spell_check(pkg, utf8summary, 'Summary(%s)', lang, ignored_words)
+            if not Pkg.is_utf8_bytestr(summary):
+                printError(pkg, 'tag-not-utf8', 'Summary', lang)
+            summary = Pkg.to_unicode(summary)
+        else:
+            summary = Pkg.b2s(summary)
+        self._unexpanded_macros(pkg, 'Summary(%s)' % lang, summary)
+        spell_check(pkg, summary, 'Summary(%s)', lang, ignored_words)
        if '\n' in summary:
            printError(pkg, 'summary-on-multiple-lines', lang)
        if summary[0] != summary[0].upper():
            printWarning(pkg, 'summary-not-capitalized', lang, summary)
        if summary[-1] == '.':
            printWarning(pkg, 'summary-ended-with-dot', lang, summary)
-        if len(utf8summary) > max_line_len:
+        if len(summary) > max_line_len:
            printError(pkg, 'summary-too-long', lang, summary)
        if leading_space_regex.search(summary):
            printError(pkg, 'summary-has-leading-spaces', lang, summary)
@ -916,8 +928,6 @@ class TagsCheck(AbstractCheck.AbstractCheck):
            if res:
                printWarning(pkg, 'name-repeated-in-summary', lang,
                             res.group(1))
-        if use_utf8 and not Pkg.is_utf8_str(summary):
-            printError(pkg, 'tag-not-utf8', 'Summary', lang)


 # Create an object to enable the auto registration of the test