Python 3 fixes

This commit is contained in:
Ville Skyttä 2015-07-08 10:41:33 +03:00
parent c191d7cecd
commit c9e80c7848
3 changed files with 77 additions and 83 deletions

View File

@ -17,7 +17,7 @@ import sys
import rpm import rpm
from Filter import addDetails, printError, printWarning from Filter import addDetails, printError, printWarning
from Pkg import b2s, catcmd, getstatusoutput, is_utf8, is_utf8_str from Pkg import b2s, catcmd, getstatusoutput, is_utf8, is_utf8_bytestr
import AbstractCheck import AbstractCheck
import Config import Config
@ -394,17 +394,17 @@ class FilesCheck(AbstractCheck.AbstractCheck):
def check(self, pkg): def check(self, pkg):
files = pkg.files()
if use_utf8: if use_utf8:
for filename in files: for filename in pkg.header[rpm.RPMTAG_FILENAMES] or ():
if not is_utf8_str(filename): if not is_utf8_bytestr(filename):
printError(pkg, 'filename-not-utf8', filename) printError(pkg, 'filename-not-utf8', b2s(filename))
# Rest of the checks are for binary packages only # Rest of the checks are for binary packages only
if pkg.isSource(): if pkg.isSource():
return return
files = pkg.files()
# Check if the package is a development package # Check if the package is a development package
devel_pkg = devel_regex.search(pkg.name) devel_pkg = devel_regex.search(pkg.name)

78
Pkg.py
View File

@ -13,7 +13,6 @@ import re
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
import unicodedata
try: try:
from urlparse import urljoin from urlparse import urljoin
except: except:
@ -37,6 +36,7 @@ if sys.version_info[0] > 2:
# Blows up with Python < 3 without the exec() hack # Blows up with Python < 3 without the exec() hack
exec('def warn(s): print (s, file=sys.stderr)') exec('def warn(s): print (s, file=sys.stderr)')
long = int long = int
unicode = str
def b2s(b): def b2s(b):
if b is None: if b is None:
@ -102,7 +102,7 @@ def substitute_shell_vars(val, script):
return val return val
def getstatusoutput(cmd, stdoutonly=False, shell=False): def getstatusoutput(cmd, stdoutonly=False, shell=False, raw=False):
'''A version of commands.getstatusoutput() which can take cmd as a '''A version of commands.getstatusoutput() which can take cmd as a
sequence, thus making it potentially more secure.''' sequence, thus making it potentially more secure.'''
if stdoutonly: if stdoutonly:
@ -113,12 +113,14 @@ def getstatusoutput(cmd, stdoutonly=False, shell=False):
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, close_fds=True) stderr=subprocess.STDOUT, close_fds=True)
proc.stdin.close() proc.stdin.close()
text = b2s(proc.stdout.read()) text = proc.stdout.read()
if not raw:
text = b2s(text)
if text.endswith('\n'):
text = text[:-1]
sts = proc.wait() sts = proc.wait()
if sts is None: if sts is None:
sts = 0 sts = 0
if text.endswith('\n'):
text = text[:-1]
return sts, text return sts, text
bz2_regex = re.compile('\.t?bz2?$') bz2_regex = re.compile('\.t?bz2?$')
@ -136,50 +138,32 @@ def catcmd(fname):
def is_utf8(fname): def is_utf8(fname):
(sts, text) = getstatusoutput(catcmd(fname).split() + [fname]) (sts, output) = getstatusoutput(catcmd(fname).split() + [fname], raw=True)
return not sts and is_utf8_str(text) return not sts and is_utf8_bytestr(output)
REPLACEMENT_CHAR = unicodedata.lookup('REPLACEMENT CHARACTER')
def is_utf8_str(s): def is_utf8_bytestr(s):
if hasattr(s, 'decode'): try:
# byte string s.decode('UTF-8')
try: except:
s.decode('UTF-8') return False
except: return True
return False
return True
# unicode string
return REPLACEMENT_CHAR not in s
# TODO: PY3 def to_unicode(string):
def to_utf8(string):
if string is None: if string is None:
return '' return unicode('')
elif isinstance(string, unicode): elif isinstance(string, unicode):
return string return string
try: for enc in ('utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'):
x = unicode(string, 'ascii') try:
return string x = unicode(string, enc)
except UnicodeError: except UnicodeError:
encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'] pass
for enc in encodings:
try:
x = unicode(string, enc)
except UnicodeError:
pass
else:
if x.encode(enc) == string:
return x.encode('utf-8')
newstring = ''
for char in string:
if ord(char) > 127:
newstring = newstring + '?'
else: else:
newstring = newstring + char if x.encode(enc) == string:
return newstring return x
return unicode(string, "ascii", errors=replace)
def readlines(path): def readlines(path):
@ -494,7 +478,7 @@ class Pkg:
os.close(fd) os.close(fd)
self.is_source = not self.header[rpm.RPMTAG_SOURCERPM] self.is_source = not self.header[rpm.RPMTAG_SOURCERPM]
self.name = b2s(self.header[rpm.RPMTAG_NAME]) self.name = self[rpm.RPMTAG_NAME]
if self.isNoSource(): if self.isNoSource():
self.arch = 'nosrc' self.arch = 'nosrc'
elif self.isSource(): elif self.isSource():
@ -520,11 +504,11 @@ class Pkg:
if val == []: if val == []:
return None return None
else: else:
if key in (rpm.RPMTAG_VERSION, rpm.RPMTAG_RELEASE, rpm.RPMTAG_ARCH, # Note that text tags we want to try decoding for real in TagsCheck
rpm.RPMTAG_GROUP, rpm.RPMTAG_BUILDHOST, # such as summary, description and changelog are not here.
rpm.RPMTAG_LICENSE, rpm.RPMTAG_CHANGELOGNAME, if key in (rpm.RPMTAG_NAME, rpm.RPMTAG_VERSION, rpm.RPMTAG_RELEASE,
rpm.RPMTAG_CHANGELOGTEXT, rpm.RPMTAG_SUMMARY, rpm.RPMTAG_ARCH, rpm.RPMTAG_GROUP, rpm.RPMTAG_BUILDHOST,
rpm.RPMTAG_DESCRIPTION, rpm.RPMTAG_HEADERI18NTABLE, rpm.RPMTAG_LICENSE, rpm.RPMTAG_HEADERI18NTABLE,
rpm.RPMTAG_PACKAGER, rpm.RPMTAG_SOURCERPM) \ rpm.RPMTAG_PACKAGER, rpm.RPMTAG_SOURCERPM) \
or key in (x[0] for x in SCRIPT_TAGS) \ or key in (x[0] for x in SCRIPT_TAGS) \
or key in (x[1] for x in SCRIPT_TAGS): or key in (x[1] for x in SCRIPT_TAGS):

View File

@ -466,9 +466,10 @@ def spell_check(pkg, str, fmt, lang, ignored):
if checker: if checker:
# squeeze whitespace to ease leading context check # squeeze whitespace to ease leading context check
checker.set_text(re.sub(r'\s+', ' ', str)) checker.set_text(re.sub(r'\s+', ' ', str))
uppername = pkg.name.upper()
if use_utf8: if use_utf8:
uppername = Pkg.to_utf8(uppername).decode('utf-8') uppername = Pkg.to_unicode(pkg.header[rpm.RPMTAG_NAME]).upper()
else:
uppername = pkg.name.upper()
upperparts = uppername.split('-') upperparts = uppername.split('-')
if lang.startswith('en'): if lang.startswith('en'):
ups = [x + "'S" for x in upperparts] ups = [x + "'S" for x in upperparts]
@ -538,12 +539,14 @@ class TagsCheck(AbstractCheck.AbstractCheck):
def _unexpanded_macros(self, pkg, tagname, value, is_url=False): def _unexpanded_macros(self, pkg, tagname, value, is_url=False):
if not value: if not value:
return return
# str(value) because value might be a list if not isinstance(value, (list, tuple)):
for match in AbstractCheck.macro_regex.findall(str(value)): value = [value]
# Do not warn about %XX URL escapes for val in value:
if is_url and re.match('^%[0-9A-F][0-9A-F]$', match, re.I): for match in AbstractCheck.macro_regex.findall(val):
continue # Do not warn about %XX URL escapes
printWarning(pkg, 'unexpanded-macro', tagname, match) if is_url and re.match('^%[0-9A-F][0-9A-F]$', match, re.I):
continue
printWarning(pkg, 'unexpanded-macro', tagname, match)
def check(self, pkg): def check(self, pkg):
@ -688,7 +691,7 @@ class TagsCheck(AbstractCheck.AbstractCheck):
summary = pkg[rpm.RPMTAG_SUMMARY] summary = pkg[rpm.RPMTAG_SUMMARY]
if summary: if summary:
if not langs: if not langs:
self._unexpanded_macros(pkg, 'Summary', summary) self._unexpanded_macros(pkg, 'Summary', Pkg.b2s(summary))
else: else:
for lang in langs: for lang in langs:
self.check_summary(pkg, lang, ignored_words) self.check_summary(pkg, lang, ignored_words)
@ -698,7 +701,8 @@ class TagsCheck(AbstractCheck.AbstractCheck):
description = pkg[rpm.RPMTAG_DESCRIPTION] description = pkg[rpm.RPMTAG_DESCRIPTION]
if description: if description:
if not langs: if not langs:
self._unexpanded_macros(pkg, '%description', description) self._unexpanded_macros(pkg, '%description',
Pkg.b2s(description))
else: else:
for lang in langs: for lang in langs:
self.check_description(pkg, lang, ignored_words) self.check_description(pkg, lang, ignored_words)
@ -726,11 +730,11 @@ class TagsCheck(AbstractCheck.AbstractCheck):
else: else:
clt = pkg[rpm.RPMTAG_CHANGELOGTEXT] clt = pkg[rpm.RPMTAG_CHANGELOGTEXT]
if use_version_in_changelog: if use_version_in_changelog:
ret = changelog_version_regex.search(changelog[0]) ret = changelog_version_regex.search(Pkg.b2s(changelog[0]))
if not ret and clt: if not ret and clt:
# we also allow the version specified as the first # we also allow the version specified as the first
# thing on the first line of the text # thing on the first line of the text
ret = changelog_text_version_regex.search(clt[0]) ret = changelog_text_version_regex.search(Pkg.b2s(clt[0]))
if not ret: if not ret:
printWarning(pkg, 'no-version-in-last-changelog') printWarning(pkg, 'no-version-in-last-changelog')
elif version and release: elif version and release:
@ -751,10 +755,13 @@ class TagsCheck(AbstractCheck.AbstractCheck):
printWarning(pkg, 'incoherent-version-in-changelog', printWarning(pkg, 'incoherent-version-in-changelog',
ret.group(1), expected) ret.group(1), expected)
if clt: if use_utf8:
changelog = changelog + clt if clt:
if use_utf8 and not Pkg.is_utf8_str(' '.join(changelog)): changelog = changelog + clt
printError(pkg, 'tag-not-utf8', '%changelog') for s in changelog:
if not Pkg.is_utf8_bytestr(s):
printError(pkg, 'tag-not-utf8', '%changelog')
break
clt = pkg[rpm.RPMTAG_CHANGELOGTIME][0] clt = pkg[rpm.RPMTAG_CHANGELOGTIME][0]
if clt: if clt:
@ -870,12 +877,16 @@ class TagsCheck(AbstractCheck.AbstractCheck):
def check_description(self, pkg, lang, ignored_words): def check_description(self, pkg, lang, ignored_words):
description = pkg.langtag(rpm.RPMTAG_DESCRIPTION, lang) description = pkg.langtag(rpm.RPMTAG_DESCRIPTION, lang)
self._unexpanded_macros(pkg, '%%description -l %s' % lang, description)
utf8desc = description
if use_utf8: if use_utf8:
utf8desc = Pkg.to_utf8(description).decode('utf-8') if not Pkg.is_utf8_bytestr(description):
spell_check(pkg, utf8desc, '%%description -l %s', lang, ignored_words) printError(pkg, 'tag-not-utf8', '%description', lang)
for l in utf8desc.splitlines(): description = Pkg.to_unicode(description)
else:
description = Pkg.b2s(description)
self._unexpanded_macros(pkg, '%%description -l %s' % lang, description)
spell_check(pkg, description, '%%description -l %s', lang,
ignored_words)
for l in description.splitlines():
if len(l) > max_line_len: if len(l) > max_line_len:
printError(pkg, 'description-line-too-long', lang, l) printError(pkg, 'description-line-too-long', lang, l)
res = forbidden_words_regex.search(l) res = forbidden_words_regex.search(l)
@ -885,23 +896,24 @@ class TagsCheck(AbstractCheck.AbstractCheck):
res = tag_regex.search(l) res = tag_regex.search(l)
if res: if res:
printWarning(pkg, 'tag-in-description', lang, res.group(1)) printWarning(pkg, 'tag-in-description', lang, res.group(1))
if use_utf8 and not Pkg.is_utf8_str(description):
printError(pkg, 'tag-not-utf8', '%description', lang)
def check_summary(self, pkg, lang, ignored_words): def check_summary(self, pkg, lang, ignored_words):
summary = pkg.langtag(rpm.RPMTAG_SUMMARY, lang) summary = pkg.langtag(rpm.RPMTAG_SUMMARY, lang)
self._unexpanded_macros(pkg, 'Summary(%s)' % lang, summary)
utf8summary = summary
if use_utf8: if use_utf8:
utf8summary = Pkg.to_utf8(summary).decode('utf-8') if not Pkg.is_utf8_bytestr(summary):
spell_check(pkg, utf8summary, 'Summary(%s)', lang, ignored_words) printError(pkg, 'tag-not-utf8', 'Summary', lang)
summary = Pkg.to_unicode(summary)
else:
summary = Pkg.b2s(summary)
self._unexpanded_macros(pkg, 'Summary(%s)' % lang, summary)
spell_check(pkg, summary, 'Summary(%s)', lang, ignored_words)
if '\n' in summary: if '\n' in summary:
printError(pkg, 'summary-on-multiple-lines', lang) printError(pkg, 'summary-on-multiple-lines', lang)
if summary[0] != summary[0].upper(): if summary[0] != summary[0].upper():
printWarning(pkg, 'summary-not-capitalized', lang, summary) printWarning(pkg, 'summary-not-capitalized', lang, summary)
if summary[-1] == '.': if summary[-1] == '.':
printWarning(pkg, 'summary-ended-with-dot', lang, summary) printWarning(pkg, 'summary-ended-with-dot', lang, summary)
if len(utf8summary) > max_line_len: if len(summary) > max_line_len:
printError(pkg, 'summary-too-long', lang, summary) printError(pkg, 'summary-too-long', lang, summary)
if leading_space_regex.search(summary): if leading_space_regex.search(summary):
printError(pkg, 'summary-has-leading-spaces', lang, summary) printError(pkg, 'summary-has-leading-spaces', lang, summary)
@ -916,8 +928,6 @@ class TagsCheck(AbstractCheck.AbstractCheck):
if res: if res:
printWarning(pkg, 'name-repeated-in-summary', lang, printWarning(pkg, 'name-repeated-in-summary', lang,
res.group(1)) res.group(1))
if use_utf8 and not Pkg.is_utf8_str(summary):
printError(pkg, 'tag-not-utf8', 'Summary', lang)
# Create an object to enable the auto registration of the test # Create an object to enable the auto registration of the test