index -summary -history -testscript perl / python |
python off_t_problem.py [directory|files]...
|
#! /usr/bin/env python
import warnings
import os.path
import popen2
import sys
import re
# ---------------------------------------------------------------------------
# helper functions to make up shorthands that wrap inferior python api calls
errors = 0
def warn(msg, error=None):
global errors
errors += 1
if error is None:
warnings.warn("-- "+str(errors)+" --\n "+msg, RuntimeWarning, 2)
else:
warnings.warn("-- "+str(errors)+" --\n "+msg+
"\n error was "+str(error), RuntimeWarning, 2)
#fu
def set(dict, name, value, result = None):
""" used inside lambda expressions to do assignments to a dictionary """
dict[name] = value
if result is None: return value
return result
def sorted(list):
""" shallow copy the list and .sort() the result """
newlist = list[:]
newlist.sort()
return newlist
# for perl chomp(str) use python str.strip() - as whitespaces includes \n
# for perl -f path use python os.path.isfile(path) - or isdir() similarly
def os_path_exists(path):
return os.access(path, os.F_OK)
def os_path_readable(path):
return os.access(path, os.R_OK)
def os_path_writable(path):
return os.access(path, os.W_OK)
def os_path_executable(path):
return os.access(path, os.W_OK)
# commands.getoutput mangles stderr/stdout, i.e. it would return Popen4()
def os_read_stdout(command):
return popen2.Popen3(command).fromchild.read()
def Q(str):
return "'"+str+"'"
# and remember, in python it is impossible to do something like
# while line = file.readline() .... Guido van Rossum is stupid rejecting it.
# instead you have to slurp in the entire file as an array using it like
# for line in file.readlines() .... aaarggh! the python guys did even need
# to invent extra classes to handle this common case (named xreadlines).
# P.S. python 2.3 (2003) has added `for line in file` shorthand for xreadlines,
# and python 2.4 (2005) defines sorted() doing exactly like the def above.
# ---------------------------------------------------------------------------
# beware, stupid python interprets backslashes in replace-parts only partially!
class MatchReplace:
""" A MatchReplace is a mix of a Python Pattern and a Replace-Template """
def __init__(self, matching, template):
self.matching = matching
self.template = template
self.count = 0
def __and__(self, string):
return self.matching.regex.subn(self.template, string, self.count)[0]
def __rand__(self, string):
return self.matching.regex.subn(self.template, string, self.count)[0]
def __iand__(self, string):
string = self.matching.regx.subn(self.template, string, self.count)[0]
def __rshift__(self, count):
self.count = count ; return self
def __rlshift__(self, count):
self.count = count ; return self
class Match(str):
""" A Match is actually a mix of a Python Pattern and MatchObject """
def __init__(self, pattern = None, flags = None):
Match.__call__(self, pattern, flags)
def __call__(self, pattern, flags = None):
assert isinstance(pattern, str) or pattern is None
assert isinstance(flags, str) or flags is None
str.__init__(self, pattern)
self.found = None # MatchObject
self.pattern = pattern
if pattern is not None:
if flags:
self.regex = re.compile("(?"+flags+")"+self.pattern)
else:
self.regex = re.compile(self.pattern)
return self
def __truth__(self):
return self.found is not None
def __and__(self, string):
self.found = self.regex.search(string)
return self.__truth__()
def __rand__(self, string):
self.found = self.regex.search(string)
return self.__truth__()
def __rshift__(self, template):
return MatchReplace(self, template)
def __rlshift__(self, template):
return MatchReplace(self, template)
def __getitem__(self, index):
assert self.found is not None
return self.found.group(index)
# ---------------------------------------------------------------------------
# use as o.optionname to check for commandline options.
class Options:
var = {}
def __getattr__(self, name):
if not self.var.has_key(name): return None
return self.var[name]
def __setattr__(self, name, value):
self.var[name] = value
#:class
o = Options()
o.symbols = 1
# o.files = 1
# o.needed = 1
# o.libpath = 1
# o.detected = 1
# o.symbols = 1
o.help = """perl off_t_problem.pl [directory|files]...
scans the given files (or all files in a directory) for its
dynamic dependencies. The binary and all its dependencies
are classified whether they have been compiled as largefile
or not - depending on the existance of symbols like plain
fopen()/lseek() or their 64bit cousins from the transitional
largefile-API named fopen64()/lseek64() instead. When two
executable objects have a mismatch then it gets reported!
debug options:
--files after parsing commandline, print the list of files
that will be checked for largefile mismatch
--needed after scanning dynamic imports of the given files
print the (long) list of dependencies recognized
which wil be scanned too for largefile mismatches
--libpath show the libpath that was used to resolve some of
of the dependencies if `ldd` was not available
--symbols print the number of dynamic symbols found in each
object while scanning them (default=ON).
--detected for each object that was scanned, print the
classification attribute -??- -32- -64- or 3264
(along with the dynamic symbols that made this
think it is of that largefile type)
--quiet suppress the list of classifications printed just
or usually before the list of largefile mismatches
--silent ...it does also silence some other hints usually
printed to the screen (--quit/--no-symbols/--smart)
--smart suppress largefile mismatch for a limited set of
known dependency libs from which only a known set
of algorithm functions is imported (i.e. 'zlib')
--nonclean for libraries that might be checked smart, show
the first symbol that was thought to be offending.
--noncleanall or actually print all the imported symbols from
mismatching libs that are not known to be good. """
def col(wanted, prefix):
""" move to column - the length of the input string is taken current
column and some spaces are returned to get the sum of 36 """
column = len(prefix)
if wanted <= column: return ""
return " " * (wanted - column);
#fu
def col36(prefix):
return col(36,prefix)
def col34(prefix):
return col(34,prefix)
# ----------------------------------------------------------------------
class File:
def __init__(file, name, dir = None):
file.name = name
file.dir = dir
#class
X = {}; file = ""; # use as X[file]
# this is the implicit libpath, as if used by ld.so to resolve imports..
L = [ "/lib", "/usr/lib", "/usr/local/lib"];
def import_ldso():
""" fill the library path """
filename = "/etc/ld.so.conf"
try:
file = open (filename, "r")
except IOError, error:
warn ("WARNING: import of "+filename+" failed", error)
else:
for line in file.readlines():
L.append(line.strip())
file.close()
import_ldso()
def scan_args(args):
" scan the argument list, options and files and dirs, fill X file-hash "
x = Match()
old = "" # pushback of $arg
for arg in args:
if old == "-L":
L.append(arg) ; old = "" ; continue
old = ""
if arg == "-help" or arg == "--help":
print o.help ; continue
if arg & x(r"^--?(\w[\w-]*)=(.*)"):
o.var[x[1]] = x[2] ; continue
if arg & x(r"^--?no-([a-z].*)"):
o.var[x[1]] = "" ; continue
if arg & x(r"^--?([a-z].*)"):
o.var[x[1]] = "*" ; continue
if arg == "-L":
old = arg ; continue
if arg & x(r"^-L(.+)"):
L.append (x[1]) ; continue
if arg & x(r"^-[A-Z]"):
warn("WARNING: illegal option "+arg)
arg = arg.rstrip("/") # chomp dirsep
# register the file in th %X hash - .dir says where from (debugging)
if os.path.isfile(arg):
if not os.path.isdir(arg) and os_path_readable(arg):
file = os.path.realpath(arg)
X[file] = File(file, dir = os.path.dirname(arg))
continue
# when a directory was given, we scan all executables in it
if os.path.isdir(arg):
for entry in os.listdir(arg):
name = os.path.realpath(arg+"/"+entry)
if os.path.isdir(name): continue
if not os_path_readable(name): continue
filetype = os_read_stdout("file "+name+ "2</dev/null")
if filetype & x(r"script") or filetype & x(r"text"):
continue
# the following call will skip symlinks to real files..
# if not type & x(r"ELF"): # well, warn later on..
X[name] = File(name, dir = arg)
#fi
# od argv
scan_args(sys.argv[1:])
def print_libpath(out = None):
""" debugging - print @L list (if --libpath seen) """
if out is None: out = sys.stderr
for file in L:
print << out, "-L",file
if o.libpath: print_libpath()
def print_files(out = None):
""" debugging - print %X files (if --files seen) """
if out is None: out = sys.stderr
for file in sorted(X.keys()):
print << out, file, "<<", X[file].dir
if o.files: print_files()
# some options imply other options...
if o.silent: o.quiet = 1
if o.silent: o.smart = 1
if o.silent: o.symbols = "" # yes, --symbols is ON by default
if o.noncleanall: o.nonclean = 1
if o.q: o.quiet = 1
# __________________ detect dynamic library imports _________________
def detect_needed(file):
""" register library imports in $X{$file}{needed}{*} """
file.needed = {}
# `ldd` prints a nice list of import libs and how they resolve
header = ""
try:
if not o.noldd:
header = os_read_stdout("ldd "+file.name+" 2</dev/null")
header & Match(r"(?mx) ^\s+(\S+)\s+[=][<]\s+(\S+)") << (
lambda x : set(file.needed, x.group(1), x.group(2), "") )
if file.needed: return
except Exception, e:
print "ERROR ldd "+file.name+":\n"+header+"\n\n", str(e)
return
# when there was nothing seen by `ldd` then try again with objdump.
# however, "objdump -p" shows lib imports but not how they resolve...
try:
header = os_read_stdout("objdump -p "+file.name+" 2</dev/null")
header & Match(r"(?mx) ^\s+NEEDED\s+(\S+)") << (
lambda x : set(file.needed, x.group(1), "", ""))
# unless $1 eq "NEEDED"
except Exception, e:
print "ERROR objdump -p "+file.name+":\n"+header+"\n\n", str(e)
return
# without ldd, we need to resolve the libimports ourselves
for lib in file.needed.keys():
if file.needed[lib]: continue
for dir in L: # walk -L libpath
if os.path.isfile(dir+"/"+lib):
file.needed[lib] = dir+"/"+lib ; break
return
#fu
for filename in sorted(X.keys()):
if not o.quiet: sys.stdout.write(".") ; sys.stdout.flush()
detect_needed(X[filename])
if not o.quiet: print "\n"
def debug_needed():
""" debugging - print imports if "--needed" was seen """
if o.needed:
for file in sorted(X.keys()):
for lib in sorted(X[file].needed.keys()):
print << sys.stderr, "OBJ", file, "-" \
,lib,"=<",Q(X[file].needed[lib])
debug_needed()
# _____________________ classify each object ___________________________
R = {}; lib = "" # use as R[lib] - it's a cache storing classifications.
# compare with largefile specs at http://ftp.sas.com/standards/large.file
# differences detected by 64on32bits hints, about section 4 of the
# http://ftp.sas.com/standards/large.file/specs/api+.006.ps
base64 = [ "creat64", "open64", "ftw64", "nftw64", "fgetpos64",
"fopen64", "freopen64", "fseeko64", "fsetpos64",
"ftello64", "tmpfile64", "mmap64", "fstat64",
"lstat64", "stat64", "statvfs64", "fstatvfs64",
"lockf64", "lseek64", "ftruncate64", "truncate64",
"aio_read64", "aio_write64", "lio_listio64", "aio_erro64",
"aio_return64", "aio_cancel64", "aio_suspend64",
# these have been seen in the wild as well...
"mkstemp64", "tmpfile64", "readdir64",
"pread64", "pwrite64", "sendfile64" ]
def classifyRlib(lib):
""" this routine is run for all %X files and all their X[file].needed[*]
dependencies - it stores the information into the %R cache for each
one. We also check the list of exported/imported symbols along """
class SymTable:
def __init__(self):
self.symlist = {}
self.is32 = ""
self.is64 = ""
self.import3264 = ""
def sym(self, name):
if self.symlist.has_key(name):
return self.symlist[name]
return None
def add(self,name,value):
self.symlist[name] = value
return ""
def symbols(self):
return self.symlist
global R
R[lib] = SymTable()
if lib.startswith("("):
print << sys.stdout, "ignored:", lib
return
# read the dynamic symbol table (slow!) and register in $R{$lib}{sym}{*}
dynamicsymbols = os_read_stdout("objdump -T "+lib)
dynamicsymbols & Match(r"(?mx) ^ (.*) \s+ ([\w_]\w+) \s*$") << (
lambda x : R[lib].add(x.group(2), x.group(1)))
if o.symbols and R[lib].symbols():
print << sys.stderr,"symbols:",lib, \
col36(lib), len(R[lib].symbols())
def imported(str):
return str.find("*UND*") <= 0
for sym in base64: # foreach known ..64 symbol from the largefile-API
sym &= Match(r"64$") << ""
if R[lib].sym(sym+"32"): continue
if R[lib].sym(sym+"64"): R[lib].is64 += " "+sym+"64"
if R[lib].sym(sym): R[lib].is32 += " "+sym+".."
if R[lib].sym(sym) and R[lib].sym(sym+"64"):
if imported(R[lib].sym(sym)) and imported(R[lib].sym(sym+"64")):
R[lib].import3264 += " "+sym+"64/"+sym
#od
if R[lib].is32: return
# secondly - if the library/binary is itself _64 and does also export
# functions in traditional dualmode-style (none/none64) then declare
# them _32 as well - effectivly classifying it as a 3264 dualmode object
x = Match()
for sym in R[lib].symbols().keys():
if not sym & x(r"\w[\w_]+\w\w64$") or sym & x(r"(_int|Int)64$"):
continue # for each symbol like "\w+64"
sym &= x(r"64$") << "" # which exports a cousin symbol
if not R[lib].sym(sym): continue # without the "64" suffix....
if imported(R[lib].sym(sym)): continue
number = -1; # sanity check: there is no other symbol
for num in xrange(0,1024): # with a number suffix, esp. no sym+"32"
if num == 64: continue # or sym+"65" but we test all up to 1024
if R[lib].sym(sym+"%i"%num):
number = num ; break # --< continue outer loop
if number < 0:
# okay, this $lib looks like exporting 3264 dualmode symbols..
if not len(R[lib].is32): R[lib].is32 = " "*len(R[lib].is64)
if R[lib].sym(sym+"64"): R[lib].is64 += " "+sym+"64"
if R[lib].sym(sym): R[lib].is32 += " "+sym+".."
#od
#fu
# the function above was defined as "fu", now let's walk all the binaries
# and imported libraries, and classify whether they are _32 or _64 (or both)
for file in X.keys():
classifyRlib (file)
for importlib in X[file].needed.keys():
lib = X[file].needed[importlib]
if R.has_key(lib): continue # already classified
classifyRlib (lib);
#od
if o.symbols:
print << sys.stderr,"\n" # (done with scanning/reading object files)
# .........................................................................
def printRlib(lib, out):
""" helper: print the classifyRlib result of a given Rlib to STDOUT """
if out is None: out = sys.stdout
if R[lib].is32:
if R[lib].is64:
print << out,"imports:",lib, col36(lib),"32++",R[lib].is32.lstrip()
print << out,"imports:",lib, col36(lib),"++64",R[lib].is64.lstrip()
else:
print << out,"imports:",lib, col36(lib),"-32-",R[lib].is32.lstrip()
else:
if R[lib].is64:
print << out,"imports:",lib, col36(lib),"-64-",R[lib].is64.lstrip()
else:
print << out,"imports:",lib, col36(lib),"-??-"
#fu
def Rtyp(lib):
""" helper - subset of above, only 4char classify-code is returned """
if R[lib].is32:
if R[lib].is64:
return "3264"
else:
return "-32-"
else:
if R[lib].is64:
return "-64-"
else:
return "-??-"
#fu
def debug_detected():
""" print classifyRlib results to STDOUT if "--detected" was seen """
if o.detected:
for lib in sorted(R.keys()):
if lib & Match(r".*/libc[.]so[.]\d+$"):
continue
printRlib (lib, sys.stderr);
debug_detected()
# _______________________ smart helper function _____________________
# some dependencies should not provoke a mismatch even that the
# libraries themselves do mismatch in their largefile mode - that is
# the case when only algorithm functions are imported that would not
# trigger access to any filedescriptor - `zlib` is a good example.
#
# implementation: for a known set of dependent libraries, we can check
# which symbols have been imported from it. We know about those imports
# of algorithms that are acceptable. If only these were seen, then the
# import dependency turns out to be notoffending, i.e. it is "(clean)".
goodimports = { "libz" : [ r"deflate\w*", r"inflate\w*",
r"compress\w*", r"uncompress\w*",
r"\w+32", r"zError", r"zlibVersion"],
# only file-reference: poptReadConfigFile(...,name)
"libpopt" : [ r"popt[A-Z](?:\w(?!File))*" ],
"libutil" : [ r"(open|fork)pty",
r"log(in|out|wtmp|in_tty)" ],
"libdv" : [ r"\w*" ], # only encode/decode memory buffers
"libpam" : [ r"\w*" ], # only memory buffer checking
"libnsl" : [ r"\w*" ], # only NIS registry nonfs readwrite
"libhistory" : [ r"\w*" ], # a.k.a. readline
"libreadline" : [ r"readline", "add_history" ],
"libXpm" : [ r"XpmCreatePixmapFromData" ],
"libssl" : [ r"SSL_\w*" ],
"libfreetype" : [ r"\w*" ],
"libXt" : [ r"Xt(\w(?!Input))*" ],
"libXm" : [ r"_?Xm\w*" ],
"libldap" : [ r"ldap_domain2hostlist", r"ldap_err2string" ],
".." : [ "<<" ] }
def notoffending(bin,lib):
if not R[bin].is64 or not R[lib].is32: return 0
library = ""
x = Match()
for known in goodimports.keys():
if "/"+lib & x(r"/%s"+known+r"[.]so\b[^/]*$"):
library = known; break
# if not library and not o.nonclean: return 0
if not library: library = ".."
offending = ""
for sym in R[lib].symbols().keys():
if (R[lib].sym(sym) & x(r"[*]UND[*]") or # $lib imports(!!) it.
sym & x(r"^_\w+_*") or # compiler symbols / hidden symbols
sym & x(r"^\d") or # hmmm, does exist sometimes
sym & x(r"^[A-Z_]+[.]\w+") or # a dot in the middle, "GLIBC_2.1"
sym & x(r"^\s*$") or # empty, some extra info line
not R[bin].sym(sym) ):
continue
# the symbol is exported(!!) by $lib and it exists in $bin....
for known in goodimports[library]:
if sym & x(r"^"+known+"$"): # if it's a known goodimports symbol
sym = ""; break # then clean it - it's not offending.
if sym: # otherwise, we have an offending symbol.
offending += '"'+sym+'" '
if not o.noncleanall: break
#od
#od
if not offending: return 1 # imports only known good symbols.
if library == ".." : library = lib
if o.nonclean: print "nonclean:"+bin,col36(bin),"(64-<<-32)",
if o.nonclean: print library,
if o.noncleanall: print "(not clean?)"
if o.nonclean: print offending
return 0; # found symbols not in the goodlist, return FALSE.
#fu
# ___________________ show largefile-mode mismatches __________________
# we walk the %X{file}s twice - we check out all the largefile mismatches
# and register them in the %offending hash. When done, then we print the
# Rlib classification of these, so that the reader can have an eyeball
# check if that is actually done right. Finally, go over the list for
# real and print the largefile mismatches - as an extension some of the
# largefile-mismatches are marked "(clean)" when the `notoffending`-helper
# functions knows that the $bin file does not import any symbol from its
# dependency $lib that could trigger some file access. So, even that there
# is a mismatch, it does not matter for there will be no non-largefile-mode
# access to the filesystem effectivly. using "--smart" or "--silent" will
# suppress these lines completely from output to the user screen.
offending = {}
def mismatch(file, lib):
# okay: -64-<<-64- 3264<<-64- 3264<<3264 and -32-<<-32- -32-<<3264
# else: mismatch: 3264<<-32- -64-<<-32- and -32-<<-64-
if ( not R[file].is32 and not R[file].is64 ): return 0
if ( not R[lib].is32 and not R[lib].is64 ): return 0
if ( R[file].is64 and R[lib].is64 ): return 0
if ( R[file].is32 and R[lib].is32 and
not R[file].is64 ): return 0
return 1
def compute_mismatches():
offending = {}
for file in X.keys(): # register the largefile mismatches
for importlib in X[file].needed.keys():
lib = X[file].needed[importlib]
if not mismatch(file, lib): continue
if o.smart and notoffending (file, lib): continue
# mark = ""; mark=" (clean)" if notoffending (file,lib)
# print file,col36(file),Rtyp(file)+"<<"+Rtyp(lib),lib,mark
offending[lib] = "!" # register both, so that we'll see the
offending[file] = "!" # Rlib classification of both of them.
#od
#od
for file in X.keys():
if R[file].import3264:
offending[file] = "!"
return offending
offending = compute_mismatches()
def printRlib_forall(libs, out = None):
""" and here we print the Rlib classification """
printed = 0
for lib in libs:
printRlib (lib, out)
printed += 1
return printed
if not o.quiet: # unless however "--quiet" or "--silent" seen.
mismatches = printRlib_forall( sorted(offending.keys()) )
if not mismatches and not o.silent:
print "no largefile mismatch found :-)\n"
if not mismatches:
sys.exit(0) # note: the last line of this script reads "exit 1" :-)
#fi
have_weirdos = []
def print_offending_import3264(out = None):
""" here we show all the miscompiled libraries """
global have_weirdos
if out is None: out = sys.stdout
for lib in sorted(offending.keys()):
if R[lib].import3264:
print << out, "weirdos:",lib,col34(lib) \
,"IMPORTS",R[lib].import3264.lstrip()
have_weirdos.append(os.path.basename(lib))
return len(have_weirdos)
if not o.quiet:
if print_offending_import3264():
print "WARNING: importing both 32bit and 64bit off_t symbols" \
" is very very dangerous!"
have_badlinks = 0
have_cleanlinks = 0
def print_mismatches():
global have_badlinks, have_cleanlinks
for file in sorted(X.keys()): # now show the largefile mismatches
for importlib in sorted(X[file].needed.keys()):
lib = X[file].needed[importlib]
if not mismatch(file,lib): continue
if o.smart and notoffending (file, lib): continue
have_badlinks += 1
if notoffending (file, lib):
have_cleanlinks += 1
print "badlink:",file,col36(file) \
,Rtyp(file)+"<<"+Rtyp(lib),lib,"(clean)"
else:
print "Badlink:",file,col36(file) \
,Rtyp(file)+"<<"+Rtyp(lib),lib
# offending[lib] = "!"
# offending[file] = "!"
#od
#od
print_mismatches()
def print_summary():
global have_badlinks, have_cleanlinks, have_weirdos
if len(have_weirdos):
print "summary: found",len(have_weirdos) \
, " weirdos - too dangerous to use them: (file bug report!)"
if o.nonclean:
line = ""
for item in have_weirdos:
if len (line+" "+item) < 70:
print "summary: ("+line[:-1]+")"
line = ""
line += item+" "
if line: print "summary: ("+line[:-1]+")"
print "summary: found",have_badlinks \
, "badlinks to be checked closer (",have_cleanlinks,"are clean)"
if have_badlinks and not o.nonclean:
print "summary: check symbols with --nonclean or even --noncleanall"
print_summary()
sys.exit(1) # there were some offending imports, or so it seems....