download 'off_t_problem.py' here! | free as MPL 1.0 |
python off_t_problem.py [directory|files]...
# check files for being problematic,
or just all files in a give directory
# use --silent when running it non-interactivly.
#! /usr/bin/env python import warnings import os.path import popen2 import sys import re # --------------------------------------------------------------------------- # helper functions to make up shorthands that wrap inferior python api calls errors = 0 def warn(msg, error=None): global errors errors += 1 if error is None: warnings.warn("-- "+str(errors)+" --\n "+msg, RuntimeWarning, 2) else: warnings.warn("-- "+str(errors)+" --\n "+msg+ "\n error was "+str(error), RuntimeWarning, 2) #fu def set(dict, name, value, result = None): """ used inside lambda expressions to do assignments to a dictionary """ dict[name] = value if result is None: return value return result def sorted(list): """ shallow copy the list and .sort() the result """ newlist = list[:] newlist.sort() return newlist # for perl chomp(str) use python str.strip() - as whitespaces includes \n # for perl -f path use python os.path.isfile(path) - or isdir() similarly def os_path_exists(path): return os.access(path, os.F_OK) def os_path_readable(path): return os.access(path, os.R_OK) def os_path_writable(path): return os.access(path, os.W_OK) def os_path_executable(path): return os.access(path, os.W_OK) # commands.getoutput mangles stderr/stdout, i.e. it would return Popen4() def os_read_stdout(command): return popen2.Popen3(command).fromchild.read() def Q(str): return "'"+str+"'" # and remember, in python it is impossible to do something like # while line = file.readline() .... Guido van Rossum is stupid rejecting it. # instead you have to slurp in the entire file as an array using it like # for line in file.readlines() .... aaarggh! the python guys did even need # to invent extra classes to handle this common case (named xreadlines). # P.S. python 2.3 (2003) has added `for line in file` shorthand for xreadlines, # and python 2.4 (2005) defines sorted() doing exactly like the def above. # --------------------------------------------------------------------------- # beware, stupid python interprets backslashes in replace-parts only partially! class MatchReplace: """ A MatchReplace is a mix of a Python Pattern and a Replace-Template """ def __init__(self, matching, template): self.matching = matching self.template = template self.count = 0 def __and__(self, string): return self.matching.regex.subn(self.template, string, self.count)[0] def __rand__(self, string): return self.matching.regex.subn(self.template, string, self.count)[0] def __iand__(self, string): string = self.matching.regx.subn(self.template, string, self.count)[0] def __rshift__(self, count): self.count = count ; return self def __rlshift__(self, count): self.count = count ; return self class Match(str): """ A Match is actually a mix of a Python Pattern and MatchObject """ def __init__(self, pattern = None, flags = None): Match.__call__(self, pattern, flags) def __call__(self, pattern, flags = None): assert isinstance(pattern, str) or pattern is None assert isinstance(flags, str) or flags is None str.__init__(self, pattern) self.found = None # MatchObject self.pattern = pattern if pattern is not None: if flags: self.regex = re.compile("(?"+flags+")"+self.pattern) else: self.regex = re.compile(self.pattern) return self def __truth__(self): return self.found is not None def __and__(self, string): self.found = self.regex.search(string) return self.__truth__() def __rand__(self, string): self.found = self.regex.search(string) return self.__truth__() def __rshift__(self, template): return MatchReplace(self, template) def __rlshift__(self, template): return MatchReplace(self, template) def __getitem__(self, index): assert self.found is not None return self.found.group(index) # --------------------------------------------------------------------------- # use as o.optionname to check for commandline options. class Options: var = {} def __getattr__(self, name): if not self.var.has_key(name): return None return self.var[name] def __setattr__(self, name, value): self.var[name] = value #:class o = Options() o.symbols = 1 # o.files = 1 # o.needed = 1 # o.libpath = 1 # o.detected = 1 # o.symbols = 1 o.help = """perl off_t_problem.pl [directory|files]... scans the given files (or all files in a directory) for its dynamic dependencies. The binary and all its dependencies are classified whether they have been compiled as largefile or not - depending on the existance of symbols like plain fopen()/lseek() or their 64bit cousins from the transitional largefile-API named fopen64()/lseek64() instead. When two executable objects have a mismatch then it gets reported! debug options: --files after parsing commandline, print the list of files that will be checked for largefile mismatch --needed after scanning dynamic imports of the given files print the (long) list of dependencies recognized which wil be scanned too for largefile mismatches --libpath show the libpath that was used to resolve some of of the dependencies if `ldd` was not available --symbols print the number of dynamic symbols found in each object while scanning them (default=ON). --detected for each object that was scanned, print the classification attribute -??- -32- -64- or 3264 (along with the dynamic symbols that made this think it is of that largefile type) --quiet suppress the list of classifications printed just or usually before the list of largefile mismatches --silent ...it does also silence some other hints usually printed to the screen (--quit/--no-symbols/--smart) --smart suppress largefile mismatch for a limited set of known dependency libs from which only a known set of algorithm functions is imported (i.e. 'zlib') --nonclean for libraries that might be checked smart, show the first symbol that was thought to be offending. --noncleanall or actually print all the imported symbols from mismatching libs that are not known to be good. """ def col(wanted, prefix): """ move to column - the length of the input string is taken current column and some spaces are returned to get the sum of 36 """ column = len(prefix) if wanted <= column: return "" return " " * (wanted - column); #fu def col36(prefix): return col(36,prefix) def col34(prefix): return col(34,prefix) # ---------------------------------------------------------------------- class File: def __init__(file, name, dir = None): file.name = name file.dir = dir #class X = {}; file = ""; # use as X[file] # this is the implicit libpath, as if used by ld.so to resolve imports.. L = [ "/lib", "/usr/lib", "/usr/local/lib"]; def import_ldso(): """ fill the library path """ filename = "/etc/ld.so.conf" try: file = open (filename, "r") except IOError, error: warn ("WARNING: import of "+filename+" failed", error) else: for line in file.readlines(): L.append(line.strip()) file.close() import_ldso() def scan_args(args): " scan the argument list, options and files and dirs, fill X file-hash " x = Match() old = "" # pushback of $arg for arg in args: if old == "-L": L.append(arg) ; old = "" ; continue old = "" if arg == "-help" or arg == "--help": print o.help ; continue if arg & x(r"^--?(\w[\w-]*)=(.*)"): o.var[x[1]] = x[2] ; continue if arg & x(r"^--?no-([a-z].*)"): o.var[x[1]] = "" ; continue if arg & x(r"^--?([a-z].*)"): o.var[x[1]] = "*" ; continue if arg == "-L": old = arg ; continue if arg & x(r"^-L(.+)"): L.append (x[1]) ; continue if arg & x(r"^-[A-Z]"): warn("WARNING: illegal option "+arg) arg = arg.rstrip("/") # chomp dirsep # register the file in th %X hash - .dir says where from (debugging) if os.path.isfile(arg): if not os.path.isdir(arg) and os_path_readable(arg): file = os.path.realpath(arg) X[file] = File(file, dir = os.path.dirname(arg)) continue # when a directory was given, we scan all executables in it if os.path.isdir(arg): for entry in os.listdir(arg): name = os.path.realpath(arg+"/"+entry) if os.path.isdir(name): continue if not os_path_readable(name): continue filetype = os_read_stdout("file "+name+ "2</dev/null") if filetype & x(r"script") or filetype & x(r"text"): continue # the following call will skip symlinks to real files.. # if not type & x(r"ELF"): # well, warn later on.. X[name] = File(name, dir = arg) #fi # od argv scan_args(sys.argv[1:]) def print_libpath(out = None): """ debugging - print @L list (if --libpath seen) """ if out is None: out = sys.stderr for file in L: print << out, "-L",file if o.libpath: print_libpath() def print_files(out = None): """ debugging - print %X files (if --files seen) """ if out is None: out = sys.stderr for file in sorted(X.keys()): print << out, file, "<<", X[file].dir if o.files: print_files() # some options imply other options... if o.silent: o.quiet = 1 if o.silent: o.smart = 1 if o.silent: o.symbols = "" # yes, --symbols is ON by default if o.noncleanall: o.nonclean = 1 if o.q: o.quiet = 1 # __________________ detect dynamic library imports _________________ def detect_needed(file): """ register library imports in $X{$file}{needed}{*} """ file.needed = {} # `ldd` prints a nice list of import libs and how they resolve header = "" try: if not o.noldd: header = os_read_stdout("ldd "+file.name+" 2</dev/null") header & Match(r"(?mx) ^\s+(\S+)\s+[=][<]\s+(\S+)") << ( lambda x : set(file.needed, x.group(1), x.group(2), "") ) if file.needed: return except Exception, e: print "ERROR ldd "+file.name+":\n"+header+"\n\n", str(e) return # when there was nothing seen by `ldd` then try again with objdump. # however, "objdump -p" shows lib imports but not how they resolve... try: header = os_read_stdout("objdump -p "+file.name+" 2</dev/null") header & Match(r"(?mx) ^\s+NEEDED\s+(\S+)") << ( lambda x : set(file.needed, x.group(1), "", "")) # unless $1 eq "NEEDED" except Exception, e: print "ERROR objdump -p "+file.name+":\n"+header+"\n\n", str(e) return # without ldd, we need to resolve the libimports ourselves for lib in file.needed.keys(): if file.needed[lib]: continue for dir in L: # walk -L libpath if os.path.isfile(dir+"/"+lib): file.needed[lib] = dir+"/"+lib ; break return #fu for filename in sorted(X.keys()): if not o.quiet: sys.stdout.write(".") ; sys.stdout.flush() detect_needed(X[filename]) if not o.quiet: print "\n" def debug_needed(): """ debugging - print imports if "--needed" was seen """ if o.needed: for file in sorted(X.keys()): for lib in sorted(X[file].needed.keys()): print << sys.stderr, "OBJ", file, "-" \ ,lib,"=<",Q(X[file].needed[lib]) debug_needed() # _____________________ classify each object ___________________________ R = {}; lib = "" # use as R[lib] - it's a cache storing classifications. # compare with largefile specs at http://ftp.sas.com/standards/large.file # differences detected by 64on32bits hints, about section 4 of the # http://ftp.sas.com/standards/large.file/specs/api+.006.ps base64 = [ "creat64", "open64", "ftw64", "nftw64", "fgetpos64", "fopen64", "freopen64", "fseeko64", "fsetpos64", "ftello64", "tmpfile64", "mmap64", "fstat64", "lstat64", "stat64", "statvfs64", "fstatvfs64", "lockf64", "lseek64", "ftruncate64", "truncate64", "aio_read64", "aio_write64", "lio_listio64", "aio_erro64", "aio_return64", "aio_cancel64", "aio_suspend64", # these have been seen in the wild as well... "mkstemp64", "tmpfile64", "readdir64", "pread64", "pwrite64", "sendfile64" ] def classifyRlib(lib): """ this routine is run for all %X files and all their X[file].needed[*] dependencies - it stores the information into the %R cache for each one. We also check the list of exported/imported symbols along """ class SymTable: def __init__(self): self.symlist = {} self.is32 = "" self.is64 = "" self.import3264 = "" def sym(self, name): if self.symlist.has_key(name): return self.symlist[name] return None def add(self,name,value): self.symlist[name] = value return "" def symbols(self): return self.symlist global R R[lib] = SymTable() if lib.startswith("("): print << sys.stdout, "ignored:", lib return # read the dynamic symbol table (slow!) and register in $R{$lib}{sym}{*} dynamicsymbols = os_read_stdout("objdump -T "+lib) dynamicsymbols & Match(r"(?mx) ^ (.*) \s+ ([\w_]\w+) \s*$") << ( lambda x : R[lib].add(x.group(2), x.group(1))) if o.symbols and R[lib].symbols(): print << sys.stderr,"symbols:",lib, \ col36(lib), len(R[lib].symbols()) def imported(str): return str.find("*UND*") <= 0 for sym in base64: # foreach known ..64 symbol from the largefile-API sym &= Match(r"64$") << "" if R[lib].sym(sym+"32"): continue if R[lib].sym(sym+"64"): R[lib].is64 += " "+sym+"64" if R[lib].sym(sym): R[lib].is32 += " "+sym+".." if R[lib].sym(sym) and R[lib].sym(sym+"64"): if imported(R[lib].sym(sym)) and imported(R[lib].sym(sym+"64")): R[lib].import3264 += " "+sym+"64/"+sym #od if R[lib].is32: return # secondly - if the library/binary is itself _64 and does also export # functions in traditional dualmode-style (none/none64) then declare # them _32 as well - effectivly classifying it as a 3264 dualmode object x = Match() for sym in R[lib].symbols().keys(): if not sym & x(r"\w[\w_]+\w\w64$") or sym & x(r"(_int|Int)64$"): continue # for each symbol like "\w+64" sym &= x(r"64$") << "" # which exports a cousin symbol if not R[lib].sym(sym): continue # without the "64" suffix.... if imported(R[lib].sym(sym)): continue number = -1; # sanity check: there is no other symbol for num in xrange(0,1024): # with a number suffix, esp. no sym+"32" if num == 64: continue # or sym+"65" but we test all up to 1024 if R[lib].sym(sym+"%i"%num): number = num ; break # --< continue outer loop if number < 0: # okay, this $lib looks like exporting 3264 dualmode symbols.. if not len(R[lib].is32): R[lib].is32 = " "*len(R[lib].is64) if R[lib].sym(sym+"64"): R[lib].is64 += " "+sym+"64" if R[lib].sym(sym): R[lib].is32 += " "+sym+".." #od #fu # the function above was defined as "fu", now let's walk all the binaries # and imported libraries, and classify whether they are _32 or _64 (or both) for file in X.keys(): classifyRlib (file) for importlib in X[file].needed.keys(): lib = X[file].needed[importlib] if R.has_key(lib): continue # already classified classifyRlib (lib); #od if o.symbols: print << sys.stderr,"\n" # (done with scanning/reading object files) # ......................................................................... def printRlib(lib, out): """ helper: print the classifyRlib result of a given Rlib to STDOUT """ if out is None: out = sys.stdout if R[lib].is32: if R[lib].is64: print << out,"imports:",lib, col36(lib),"32++",R[lib].is32.lstrip() print << out,"imports:",lib, col36(lib),"++64",R[lib].is64.lstrip() else: print << out,"imports:",lib, col36(lib),"-32-",R[lib].is32.lstrip() else: if R[lib].is64: print << out,"imports:",lib, col36(lib),"-64-",R[lib].is64.lstrip() else: print << out,"imports:",lib, col36(lib),"-??-" #fu def Rtyp(lib): """ helper - subset of above, only 4char classify-code is returned """ if R[lib].is32: if R[lib].is64: return "3264" else: return "-32-" else: if R[lib].is64: return "-64-" else: return "-??-" #fu def debug_detected(): """ print classifyRlib results to STDOUT if "--detected" was seen """ if o.detected: for lib in sorted(R.keys()): if lib & Match(r".*/libc[.]so[.]\d+$"): continue printRlib (lib, sys.stderr); debug_detected() # _______________________ smart helper function _____________________ # some dependencies should not provoke a mismatch even that the # libraries themselves do mismatch in their largefile mode - that is # the case when only algorithm functions are imported that would not # trigger access to any filedescriptor - `zlib` is a good example. # # implementation: for a known set of dependent libraries, we can check # which symbols have been imported from it. We know about those imports # of algorithms that are acceptable. If only these were seen, then the # import dependency turns out to be notoffending, i.e. it is "(clean)". goodimports = { "libz" : [ r"deflate\w*", r"inflate\w*", r"compress\w*", r"uncompress\w*", r"\w+32", r"zError", r"zlibVersion"], # only file-reference: poptReadConfigFile(...,name) "libpopt" : [ r"popt[A-Z](?:\w(?!File))*" ], "libutil" : [ r"(open|fork)pty", r"log(in|out|wtmp|in_tty)" ], "libdv" : [ r"\w*" ], # only encode/decode memory buffers "libpam" : [ r"\w*" ], # only memory buffer checking "libnsl" : [ r"\w*" ], # only NIS registry nonfs readwrite "libhistory" : [ r"\w*" ], # a.k.a. readline "libreadline" : [ r"readline", "add_history" ], "libXpm" : [ r"XpmCreatePixmapFromData" ], "libssl" : [ r"SSL_\w*" ], "libfreetype" : [ r"\w*" ], "libXt" : [ r"Xt(\w(?!Input))*" ], "libXm" : [ r"_?Xm\w*" ], "libldap" : [ r"ldap_domain2hostlist", r"ldap_err2string" ], ".." : [ "<<" ] } def notoffending(bin,lib): if not R[bin].is64 or not R[lib].is32: return 0 library = "" x = Match() for known in goodimports.keys(): if "/"+lib & x(r"/%s"+known+r"[.]so\b[^/]*$"): library = known; break # if not library and not o.nonclean: return 0 if not library: library = ".." offending = "" for sym in R[lib].symbols().keys(): if (R[lib].sym(sym) & x(r"[*]UND[*]") or # $lib imports(!!) it. sym & x(r"^_\w+_*") or # compiler symbols / hidden symbols sym & x(r"^\d") or # hmmm, does exist sometimes sym & x(r"^[A-Z_]+[.]\w+") or # a dot in the middle, "GLIBC_2.1" sym & x(r"^\s*$") or # empty, some extra info line not R[bin].sym(sym) ): continue # the symbol is exported(!!) by $lib and it exists in $bin.... for known in goodimports[library]: if sym & x(r"^"+known+"$"): # if it's a known goodimports symbol sym = ""; break # then clean it - it's not offending. if sym: # otherwise, we have an offending symbol. offending += '"'+sym+'" ' if not o.noncleanall: break #od #od if not offending: return 1 # imports only known good symbols. if library == ".." : library = lib if o.nonclean: print "nonclean:"+bin,col36(bin),"(64-<<-32)", if o.nonclean: print library, if o.noncleanall: print "(not clean?)" if o.nonclean: print offending return 0; # found symbols not in the goodlist, return FALSE. #fu # ___________________ show largefile-mode mismatches __________________ # we walk the %X{file}s twice - we check out all the largefile mismatches # and register them in the %offending hash. When done, then we print the # Rlib classification of these, so that the reader can have an eyeball # check if that is actually done right. Finally, go over the list for # real and print the largefile mismatches - as an extension some of the # largefile-mismatches are marked "(clean)" when the `notoffending`-helper # functions knows that the $bin file does not import any symbol from its # dependency $lib that could trigger some file access. So, even that there # is a mismatch, it does not matter for there will be no non-largefile-mode # access to the filesystem effectivly. using "--smart" or "--silent" will # suppress these lines completely from output to the user screen. offending = {} def mismatch(file, lib): # okay: -64-<<-64- 3264<<-64- 3264<<3264 and -32-<<-32- -32-<<3264 # else: mismatch: 3264<<-32- -64-<<-32- and -32-<<-64- if ( not R[file].is32 and not R[file].is64 ): return 0 if ( not R[lib].is32 and not R[lib].is64 ): return 0 if ( R[file].is64 and R[lib].is64 ): return 0 if ( R[file].is32 and R[lib].is32 and not R[file].is64 ): return 0 return 1 def compute_mismatches(): offending = {} for file in X.keys(): # register the largefile mismatches for importlib in X[file].needed.keys(): lib = X[file].needed[importlib] if not mismatch(file, lib): continue if o.smart and notoffending (file, lib): continue # mark = ""; mark=" (clean)" if notoffending (file,lib) # print file,col36(file),Rtyp(file)+"<<"+Rtyp(lib),lib,mark offending[lib] = "!" # register both, so that we'll see the offending[file] = "!" # Rlib classification of both of them. #od #od for file in X.keys(): if R[file].import3264: offending[file] = "!" return offending offending = compute_mismatches() def printRlib_forall(libs, out = None): """ and here we print the Rlib classification """ printed = 0 for lib in libs: printRlib (lib, out) printed += 1 return printed if not o.quiet: # unless however "--quiet" or "--silent" seen. mismatches = printRlib_forall( sorted(offending.keys()) ) if not mismatches and not o.silent: print "no largefile mismatch found :-)\n" if not mismatches: sys.exit(0) # note: the last line of this script reads "exit 1" :-) #fi have_weirdos = [] def print_offending_import3264(out = None): """ here we show all the miscompiled libraries """ global have_weirdos if out is None: out = sys.stdout for lib in sorted(offending.keys()): if R[lib].import3264: print << out, "weirdos:",lib,col34(lib) \ ,"IMPORTS",R[lib].import3264.lstrip() have_weirdos.append(os.path.basename(lib)) return len(have_weirdos) if not o.quiet: if print_offending_import3264(): print "WARNING: importing both 32bit and 64bit off_t symbols" \ " is very very dangerous!" have_badlinks = 0 have_cleanlinks = 0 def print_mismatches(): global have_badlinks, have_cleanlinks for file in sorted(X.keys()): # now show the largefile mismatches for importlib in sorted(X[file].needed.keys()): lib = X[file].needed[importlib] if not mismatch(file,lib): continue if o.smart and notoffending (file, lib): continue have_badlinks += 1 if notoffending (file, lib): have_cleanlinks += 1 print "badlink:",file,col36(file) \ ,Rtyp(file)+"<<"+Rtyp(lib),lib,"(clean)" else: print "Badlink:",file,col36(file) \ ,Rtyp(file)+"<<"+Rtyp(lib),lib # offending[lib] = "!" # offending[file] = "!" #od #od print_mismatches() def print_summary(): global have_badlinks, have_cleanlinks, have_weirdos if len(have_weirdos): print "summary: found",len(have_weirdos) \ , " weirdos - too dangerous to use them: (file bug report!)" if o.nonclean: line = "" for item in have_weirdos: if len (line+" "+item) < 70: print "summary: ("+line[:-1]+")" line = "" line += item+" " if line: print "summary: ("+line[:-1]+")" print "summary: found",have_badlinks \ , "badlinks to be checked closer (",have_cleanlinks,"are clean)" if have_badlinks and not o.nonclean: print "summary: check symbols with --nonclean or even --noncleanall" print_summary() sys.exit(1) # there were some offending imports, or so it seems....