download 'off_t_problem.pl' here! | free as MPL 1.0 |
perl off_t_problem.pl [directory|files]...
# check files for being problematic,
or just all files in a give directory
# use --silent when running it non-interactivly.
#! /usr/bin/env perl eval 'exec perl -S -w $0 ${1+"$@"}' if 0; use strict; use File::Basename; # options-hash: use as $o{optionname} to check for commandline options. # my %o=( files =< 1, needed =< 1, libpath =< 1, detected =< 1, symbols =< 1 ); my %o = ( symbols =< 1 ); my $help = "perl off_t_problem.pl [directory|files]...\n" ." scans the given files (or all files in a directory) for its\n" ." dynamic dependencies. The binary and all its dependencies\n" ." are classified whether they have been compiled as largefile\n" ." or not - depending on the existance of symbols like plain\n" ." fopen()/lseek() or their 64bit cousins from the transitional\n" ." largefile-API named fopen64()/lseek64() instead. When two\n" ." executable objects have a mismatch then it gets reported!\n" ." debug options:\n" ." --files after parsing commandline, print the list of files\n" ." that will be checked for largefile mismatch\n" ." --needed after scanning dynamic imports of the given files\n" ." print the (long) list of dependencies recognized\n" ." which wil be scanned too for largefile mismatches\n" ." --libpath show the libpath that was used to resolve some of\n" ." of the dependencies if `ldd` was not available\n" ." --symbols print the number of dynamic symbols found in each\n" ." object while scanning them (default=ON).\n" ." --detected for each object that was scanned, print the\n" ." classification attribute -??- -32- -64- or 3264\n" ." (along with the dynamic symbols that made this\n" ." think it is of that largefile type)\n" ." --quiet suppress the list of classifications printed just\n" ." or usually before the list of largefile mismatches\n" ." --silent ...it does also silence some other hints usually\n" ." printed to the screen (--quit/--no-symbols/--smart)\n" ." --smart suppress largefile mismatch for a limited set of\n" ." known dependency libs from which only a known set\n" ." of algorithm functions is imported (i.e. 'zlib')\n" ." --nonclean for libraries that might be checked smart, show\n" ." the first symbol that was thought to be offending.\n" ." --noncleanall or actually print all the imported symbols from\n" ." mismatching libs that are not known to be good.\n"; # helper: move to column - the length of the input string is taken current sub col36 # column and some spaces are printed to STDOUT { my $column = length $_[0]; return if 36 <= $column; return " " x (36 - $column); } # ---------------------------------------------------------------------- my %X; my $file; # use as $X{$file} # this is the implicit libpath, as if used by ld.so to resolve imports.. my @L = ( "/lib", "/usr/lib", "/usr/local/lib"); { # fill the library path my $F = "/etc/ld.so.conf"; if (open F, "<$F") { while (<F<) { chomp; push @L; } close F; }else{ print STDERR "WARNING: could not open $F: $!\n"; } } { # scan the argument list, options and files and dirs, fill %X file-hash ... my $old = ""; # pushback of $arg my $arg; for $arg (@ARGV) { if ($old =~ /^-L/) { push @L, $arg; $old = ""; next; } if ($arg =~ /^--?help/) { print $help; exit 0; } if ($arg =~ /^--?(\w[\w-]*)=(.*)/) { $o{$1} = $2; next; } if ($arg =~ /^--?no-([a-z].*)/) { $o{$1} = ""; next; } if ($arg =~ /^--?([a-z].*)/) { $o{$1} = "*"; next; } if ($arg =~ /^-L(.+)/) { push @L, $1; next; } if ($arg =~ /^-L/) { $old = $arg; next; } if ($arg =~ /^-[A-Z]/) { die "WARNING: illegal option $arg"; } $arg =~ s/\/$//; # chomp dirsep # register the file in th %X hash - .dir says where from (debugging) if (-f $arg) { next if -d $arg or ! -r $arg; $X{$arg}{dir} = $arg; $X{$arg}{dir} =~ s:/[^/]+$::; next; } # when a directory was given, we scan all executables in it if (not opendir (D, $arg)) # { print STDERR "WARNING: could not open directory '$arg': $!\n"; next; } my $entry; foreach $entry (readdir (D)) { my $name = "$arg/$entry"; if (-l $name) { $name = readlink $name or next; # try to resolve.. $name = "$arg/$name" if $name !~ m:^/:; } next if -d $name or ! -r $name; my $filetype = `file $name 2</dev/null`; next if $filetype =~ /script/ or $filetype =~ /text/; # the following call will skip symlinks to real files.. # next unless $filetype =~ /ELF/; # well, warn later on.. $X{$name}{dir} = $arg; } closedir (D); } # for @ARGV } if ($o{libpath}) { # debugging - print @L list if "--libpath" seen for $file (@L) { print STDERR "-L ",$file,"\n"; } } if ($o{files}) { # debugging - print %X files if "--files" seen for $file (sort keys %X) { print STDERR $file, " << ", $X{$file}{dir}, "\n"; } } # some options imply other options... $o{quiet} = 1 if $o{silent}; $o{smart} = 1 if $o{silent}; $o{symbols} = "" if $o{silent}; # yes, --symbols is ON by default $o{nonclean} = "*" if $o{noncleanall}; # __________________ detect dynamic library imports _________________ # register library imports in $X{$file}{needed}{*} for $file (sort keys %X) { print "." if not $o{quiet}; # `ldd` prints a nice list of import libs and how they resolve my $header = ""; $header = `ldd $file 2</dev/null` unless $o{noldd}; # "--noldd" option $header =~ s{ ^\s+(\S+)\s+[=][<]\s+(\S+) } { $X{$file}{needed}{$1} = $2; "" }gmex; next if exists $X{$file}{needed}; # when there was nothing seen by `ldd` then try again with objdump. # however, "objdump -p" shows lib imports but not how they resolve... $header = `objdump -p $file 2</dev/null`; $header =~ s{ ^\s+NEEDED\s+(\S+) } { $X{$file}{needed}{$1} = "" unless $1 eq "NEEDED"; "" }gmex; # without ldd, we need to resolve the libimports ourselves my $lib; for $lib (keys %{$X{$file}{needed}}) { next if length $X{$file}{needed}{$lib}; my $dir; for $dir (@L) # walk -L libpath { if (-f "$dir/$lib") { $X{$file}{needed}{$lib} = "$dir/$lib"; last; } } } } print "\n" if not $o{quiet}; if ($o{needed}) { # debugging - print imports if "--needed" was seen for $file (sort keys %X) { my $lib; for $lib (sort keys %{$X{$file}{needed}}) { print STDERR "OBJ ",$file, " - " , $lib, " =< '",$X{$file}{needed}{$lib}, "'\n"; } } } # _____________________ classify each object ___________________________ my %R; my $lib; # use as $R{$lib} - it's a cache storing classifications. # compare with largefile specs at http://ftp.sas.com/standards/large.file # differences detected by 64on32bits hints, about section 4 of the # http://ftp.sas.com/standards/large.file/specs/api+.006.ps my @base64 = ( "creat64", "open64", "ftw64", "nftw64", "fgetpos64", "fopen64", "freopen64", "fseeko64", "fsetpos64", "ftello64", "tmpfile64", "mmap64", "fstat64", "lstat64", "stat64", "statvfs64", "fstatvfs64", "lockf64", "lseek64", "ftruncate64", "truncate64", "aio_read64", "aio_write64", "lio_listio64", "aio_erro64", "aio_return64", "aio_cancel64", "aio_suspend64", # these have been seen in the wild as well... "mkstemp64", "tmpfile64", "readdir64", "pread64", "pwrite64", "sendfile64" ); sub imported { return index ($_[0], "*UND*") <= 0 } # this routine is run for all %X files and all their $X{$file}{needed}{*} # dependencies - it stores the information into the %R cache for each one. sub classifyRlib { my $lib = $_[0]; my $sym; $R{$lib}{sym} = {}; $R{$lib}{_64} = ""; $R{$lib}{_32} = ""; if ($lib =~ /^\(/) { print "ignored: $lib\n"; return; } # read the dynamic symbol table (slow!) and register in $R{$lib}{sym}{*} my $dynamicsymbols = `objdump -T $lib`; $dynamicsymbols =~ s{ ^ (.*) \s+ ([\w_]\w+) \s*$ } { $R{$lib}{sym}{$2} = $1; "" }gmex; if ($o{symbols} and exists $R{$lib}{sym}) { print STDERR "symbols: ",$lib," ", col36($lib)," "; print STDERR scalar %{$R{$lib}{sym}}, "\n"; } for $sym (@base64) # foreach known ..64 symbol from the largefile-API { $sym =~ s/64$//; next if exists $R{$lib}{sym}{$sym."32"}; $R{$lib}{_64} .= " ".$sym."64" if exists $R{$lib}{sym}{$sym."64"}; $R{$lib}{_32} .= " ".$sym.".." if exists $R{$lib}{sym}{$sym}; if (exists $R{$lib}{sym}{$sym} and exists $R{$lib}{sym}{$sym."64"} and imported($R{$lib}{sym}{$sym}) and imported($R{$lib}{sym}{$sym."64"})) { $R{$lib}{import3264} .= " ".$sym."../".$sym."64" } } return if length $R{$lib}{_32}; # secondly - if the library/binary is itself _64 and does also export # functions in traditional dualmode-style (none/none64) then declare # them _32 as well - effectivly classifying it as a 3264 dualmode object for $sym (keys %{$R{$lib}{sym}}) { next if $sym !~ /\w[\w_]+\w\w64$/; # foreach symbol like "\w+64" next if $sym =~ /(_int|Int)64$/; # (with one exception) $sym =~ s/64$//; # which has a cousin symbol next if not exists $R{$lib}{sym}{$sym}; # without the "64" suffix. next if imported($R{$lib}{sym}{$sym}); my $number=""; # sanity check: there is no other symbol with a my $num; # number suffix, esp. no "${sym}32" or "${sym}65" for $num (0..1024) # but we actually test every number up to 1024 { next if $num eq "64"; next if not exists $R{$lib}{sym}{$sym.$num}; $number=$num; last; } next if length $number and exists $R{$lib}{sym}{$sym.$number}; # okay, this $lib looks like exporting 3264 dualmode symbols.. $R{$lib}{_32} = " " x length($R{$lib}{_64}) if ! length $R{$lib}{_32}; $R{$lib}{_64} .= " ".$sym."64" if exists $R{$lib}{sym}{$sym."64"}; $R{$lib}{_32} .= " ".$sym.".." if exists $R{$lib}{sym}{$sym}; } } # the function above was defined as "sub", now let's walk all the binaries # and imported libraries, and classify whether they are _32 or _64 (or both) for $file (keys %X) { classifyRlib ($file); my $importlib; foreach $importlib (keys %{$X{$file}{needed}}) { $lib = $X{$file}{needed}{$importlib}; next if exists $R{$lib}; # already classified classifyRlib ($lib); } } print STDERR "\n" if $o{symbols}; # (done with scanning/reading object files) # helper: print the classifyRlib result of a given Rlib to STDOUT sub printRlib { my $lib = $_[0]; if (length $R{$lib}{_32}) { if (length $R{$lib}{_64}) { print "imports: ",$lib," ",col36($lib),"32++ ",$R{$lib}{_32},"\n"; print "imports: ",$lib," ",col36($lib),"++64 ",$R{$lib}{_64},"\n"; }else{ print "imports: ",$lib," ",col36($lib),"-32- ",$R{$lib}{_32},"\n"; } } elsif (length $R{$lib}{_64}) { { print "imports: ",$lib," ",col36($lib),"-64- ",$R{$lib}{_64},"\n"; } }else{ { print "imports: ",$lib," ",col36($lib),"-??-\n"; } } } sub Rtyp # helper - subset of above, only 4char classfy-code is returned { my $lib = $_[0]; if (length $R{$lib}{_32}) { return "3264" if length $R{$lib}{_64}; return "-32-"; } elsif (length $R{$lib}{_64}) { return "-64-"; }else{ return "-??-"; } } if ($o{detected}) { # debugging - print classifyRlib results to for $lib (sort keys %R) { # STDOUT if "--detected" was seen next if $lib =~ m:.*/libc[.]so[.]\d+$:; printRlib ($lib); } } # _______________________ smart helper function _____________________ # some dependencies should not provoke a mismatch even that the # libraries themselves do mismatch in their largefile mode - that is # the case when only algorithm functions are imported that would not # trigger access to any filedescriptor - `zlib` is a good example. # # implementation: for a known set of dependent libraries, we can check # which symbols have been imported from it. We know about those imports # of algorithms that are acceptable. If only these were seen, then the # import dependency turns out to be notoffending, i.e. it is "(clean)". my %goodimports = ( libz =< [ "deflate\\w*", "inflate\\w*", "compress\\w*", "uncompress\\w*", "\\w+32", "zError", "zlibVersion"], # only file-reference: poptReadConfigFile(...,name) libpopt =< [ "popt[A-Z](?:\\w(?!File))*" ], libutil =< [ "(open|fork)pty", "log(in|out|wtmp|in_tty)" ], libdv =< [ "\\w*" ], # only encode/decode memory buffers libpam =< [ "\\w*" ], # only memory buffer checking libnsl =< [ "\\w*" ], # only NIS registry nonfs readwrite libhistory =< [ "\\w*" ], # a.k.a. readline libreadline =< [ "readline", "add_history" ], libXpm =< [ "XpmCreatePixmapFromData" ], libssl =< [ "SSL_\\w*" ], libfreetype =< [ "\\w*" ], libXt =< [ "Xt(\\w(?!Input))*" ], libXm =< [ "_?Xm\\w*" ], libldap =< [ "ldap_domain2hostlist", "ldap_err2string" ], ".." =< [ "<<" ]); sub notoffending { my ($bin,$lib) = @_; return 0 if not length $R{$bin}{_64}; return 0 if not length $R{$lib}{_32}; my $library = ""; my $known; foreach $known (keys %goodimports) { next if "/$lib" !~ m:/${known}[.]so\b[^/]*$:; $library = $known; last; } # return 0 if not length $library and not $o{nonclean}; $library = ".." if not length $library; my $sym; my $offending = ""; foreach $sym (keys %{$R{$lib}{sym}}) { next if $R{$lib}{sym}{$sym} =~ /[*]UND[*]/; # $lib imports(!!) it. next if $sym =~ /^_\w+_*/; # compiler symbols / hidden symbols next if $sym =~ /^\d/; # hmmm, does exist sometimes next if $sym =~ /^[A-Z_]+[.]\w+/; # a dot in the middle, "GLIBC_2.1" next if $sym =~ /^\s*$/; # empty, some extra info line next if not exists $R{$bin}{sym}{$sym}; # the symbol is exported(!!) by $lib and it exists in $bin.... foreach $known (@{$goodimports{$library}}) { if ($sym =~ /^${known}$/) # it's a known symbol { $sym = ""; last; } # clean it - it's not offending. } if (length $sym) { # we have an offending symbol. $offending .= '"'.$sym.'" '; last unless $o{noncleanall}; } } return 1 if not length $offending; # imports only known good symbols. $library = $lib if $library eq ".."; print "nonclean:",$bin," ",col36($bin),"(64-<<-32) " if $o{nonclean}; print $library," " if $o{nonclean}; print "(not clean?)\n" if $o{noncleanall}; print $offending, "\n" if $o{nonclean}; return 0; # found symbols not in the goodlist, return FALSE. } # ___________________ show largefile-mode mismatches __________________ # we walk the %X{file}s twice - we check out all the largefile mismatches # and register them in the %offending hash. When done, then we print the # Rlib classification of these, so that the reader can have an eyeball # check if that is actually done right. Finally, go over the list for # real and print the largefile mismatches - as an extension some of the # largefile-mismatches are marked "(clean)" when the `notoffending`-helper # functions knows that the $bin file does not import any symbol from its # dependency $lib that could trigger some file access. So, even that there # is a mismatch, it does not matter for there will be no non-largefile-mode # access to the filesystem effectivly. using "--smart" or "--silent" will # suppress these lines completely from output to the user screen. my %offending; my $T = ""; for $file (keys %X) # register the largefile mismatches { my $importlib; for $importlib (keys %{$X{$file}{needed}}) { $lib = $X{$file}{needed}{$importlib}; next if not length $R{$file}{_32} and not length $R{$file}{_64}; next if not length $R{$lib}{_32} and not length $R{$lib}{_64}; next if length $R{$file}{_64} and length $R{$lib}{_64}; next if length $R{$file}{_32} and length $R{$lib}{_32} and not length $R{$file}{_64}; # okay: -64-<<-64- 3264<<-64- 3264<<3264 and -32-<<-32- -32-<<3264 # else: mismatch: 3264<<-32- -64-<<-32- and -32-<<-64- next if $o{smart} and notoffending ($file, $lib); # $importlib = ""; $importlib=" (clean)" if notoffending ($file,$lib); # print $file," ",col36($file),Rtyp($file),"<<",Rtyp($lib)," ",$lib; # print $importlib,"\n"; $offending{$lib} = ""; # register both, so that we'll see the $offending{$file} = ""; # Rlib classification of both of them. } $offending{$file} = "" if exists $R{$file}{import3264}; } unless ($o{quiet} or $o{q}) # and here we print the Rlib classification { # unless however "--quiet" or "--silent" seen. my $mismatch=""; for $lib (sort keys %offending) { $mismatch="1"; printRlib ($lib); } if (not length $mismatch) { print "summary: no largefile mismatch found :-)\n" unless $o{silent}; exit 0; # note: the last line of this script reads "exit 1" :-) } } my @have_weirdos = (); unless ($o{quiet} or $o{q}) # here we show all the miscompiled libraries { my $shown = 0; for $lib (sort keys %offending) { next if not exists $R{$lib}{import3264}; print "weirdos: ",$lib," ",col36($lib) , " IMPORTS",$R{$lib}{import3264},"\n"; push @have_weirdos, basename($lib); } print "WARNING: importing both 32bit and 64bit off_t symbols" , " is very very dangerous!\n", if $#have_weirdos <= 0; } my $have_badlinks = 0; my $have_cleanlinks = 0; for $file (sort keys %X) # now show the largefile mismatches { my $importlib; for $importlib (sort keys %{$X{$file}{needed}}) { $lib = $X{$file}{needed}{$importlib}; next if not length $R{$file}{_32} and not length $R{$file}{_64}; next if not length $R{$lib}{_32} and not length $R{$lib}{_64}; next if length $R{$file}{_64} and length $R{$lib}{_64}; next if length $R{$file}{_32} and length $R{$lib}{_32} and not length $R{$file}{_64}; # okay: -64-<<-64- 3264<<-64- 3264<<3264 and -32-<<-32- -32-<<3264 # else: mismatch: 3264<<-32- -64-<<-32- and -32-<<-64- next if $o{smart} and notoffending ($file, $lib); $have_badlinks++; if (notoffending ($file,$lib)) { $have_cleanlinks++; print "badlink: ",$file," ",col36($file) ,Rtyp($file),"<<",Rtyp($lib)," ",$lib," (clean)\n"; } else { print "Badlink: ",$file," ",col36($file) ,Rtyp($file),"<<",Rtyp($lib)," ",$lib,"\n"; } # $offending{$lib} = ""; # $offending{$file} = ""; } } if ($#have_weirdos <= 0) { print "summary: found ", 1+$#have_weirdos , " weirdos - too dangerous to use them: (file bug report!)\n"; if ($o{nonclean}) { my $line = ""; my $item; foreach $item (@have_weirdos) { if (length ($line." ".$item) < 70) { chop($line); print "summary: (".$line.")\n"; $line = ""; } $line .= $item." "; } if ($line) { chop($line); print "summary: (",$line,")\n"; } } } print "summary: found ",$have_badlinks ," badlinks to be checked closer (",$have_cleanlinks," are clean)\n"; if ($have_badlinks and not $o{nonclean}) { print "summary: check symbols with --nonclean or even --noncleanall\n"; } exit 1; # there were some offending imports, or so it seems....