View file File name : duck Content :#!/usr/bin/perl -w # duck - the Debian Url Checker # Copyright (C) 2017 Simon Kainz <skainz@debian.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # he Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # On Debian GNU/Linux systems, the complete text of the GNU General # Public License can be found in `/usr/share/common-licenses/GPL-2'. # # You should have received a copy of the GNU General Public License # along with this program. If not, you can find it on the World Wide # Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301, USA. use strict; use lib '/usr/share/duck'; use lib '/usr/share/duck/lib'; use DUCK; use Getopt::Std; use Getopt::Long qw(:config pass_through ); use Data::Dumper; use File::Basename; use File::Temp qw(tempdir); use File::Path qw(remove_tree); use Cwd; use IPC::Open3; use POSIX ":sys_wait_h"; #require lib; use Parallel::ForkManager; sub HELP_MESSAGE(); sub setColor($;$); sub display_result($;$;$); sub display_blob($;$); sub missingHelpers(); sub handle_UrlFixDisableOptions($;$); sub handle_UrlFixEnableOptions($;$); sub handle_dsc($); sub cleanup(); sub get_duck_overrides(); sub is_overridden($;$); our $color_r="\x1b[31m"; our $color_g="\x1b[32m"; our $color_y="\x1b[33m"; our $color_n="\x1b[0m"; our $abort=0; my $origcwd=getcwd; #$SIG{INT} = sub { $abort=1;cleanup();}; $SIG{INT} = sub { print $color_r."Aborting, please wait!\n".$color_n;$abort=1;cleanup();}; my $checksdir='/usr/share/duck/lib/checks'; my $try_https=0; my $no_check_cert=0; my $nocolor=0; our $enforceColor="auto"; my @allowedColorVals=qw/always auto never/; my $termcolors=0; $termcolors=`/usr/bin/tput colors 2>/dev/null`; if ($? eq 0 ) { if ($termcolors < 4) { $enforceColor="never"; } } else { $enforceColor="never"; } my $showMissingHelpers; my $urlFixEnableOptions; my $urlFixDisableOptions; my $tempdir; my @global_options=("n","v","q"); $Getopt::Std::STANDARD_HELP_VERSION=1; my $exitcode=0; our %opt; our @overrides=get_duck_overrides(); my $dh; my $parallel_tasks=24; my $retval=GetOptions( "modules-dir=s" => \$checksdir, "no-https" => \$try_https, "no-check-certificate" => \$no_check_cert, "no-color" => \$nocolor, "missing-helpers" => \$showMissingHelpers, "disable-urlfix=s" => \$urlFixDisableOptions, "enable-urlfix=s" => \$urlFixEnableOptions, "color=s" => \$enforceColor, "colour=s" => \$enforceColor, "tasks=i" => \$parallel_tasks ); die("Number of parallel tasks must be >0") unless (int($parallel_tasks)>0); my $DUCK= DUCK->new(); my $funcref= $DUCK->cb(); if ($showMissingHelpers) { print "\n"; my $mh=missingHelpers(); if ($mh) {print $mh; exit(1)} else { print "All helpers installed.\n"; exit(0);} } if ($urlFixEnableOptions && $urlFixDisableOptions) { print "Conflicting options: Either use --disable-urlfix or --enable-urlfix option.\n"; exit 1; } if (!( grep( /^$enforceColor$/, @allowedColorVals ) )) { print STDERR "Invalid option \"".$enforceColor." \"for --color: Valid options are:".join(",",@allowedColorVals)."\n"; exit 1; } if ($urlFixDisableOptions) { handle_UrlFixDisableOptions($DUCK,$urlFixDisableOptions);} if ($urlFixEnableOptions) {handle_UrlFixEnableOptions($DUCK,$urlFixEnableOptions);} if ($enforceColor eq "never" ) { $color_r=""; $color_g=""; $color_y=""; $color_n=""; } import lib $checksdir; if (!opendir($dh,$checksdir)) { print STDERR "Modules directory $checksdir not found!, please use --modules-dir=<path> !\n"; if ($opt{h}) { HELP_MESSAGE(); } exit(1); } my @modules; my @module_options; my $descriptions; while (readdir $dh) { my($filename, $directories, $suffix) = fileparse($_); if (/^\./) { next}; if (/.pm$/) { require $_; my $modulename=fileparse($filename,qr/\.pm/); my $n="DUCK::".$modulename; if ($n->can("run")) { push (@modules,$modulename); } } } if (!scalar @modules) { print STDERR "No check modules found! Please check path: ".$checksdir."\n"; exit 1; } #get all options modules are providing foreach my $m (@modules) { my $n="DUCK::".$m; if ( ($n->can("opts")) && ($n->can("run")) ) { foreach ($n->opts()) { push(@module_options,$_); } } if ($n->can("desc")) { $descriptions.=$n->desc(); } } push(@module_options,@global_options); #print Dumper @module_options; GetOptions( "help" => sub {HELP_MESSAGE() } ); getopts(join("",@module_options),\%opt); if ( $opt{v} && $opt{q} ) { print STDERR " Please specify either -q or -v\n"; exit(1); } if ( $opt{l} ) { $opt{S}=1; $opt{P}=1; $opt{F}=1; $opt{U}=1; $opt{C}=1; $opt{A}=1; } if (@ARGV) { $checksdir=getcwd."/lib/checks"; handle_dsc(\$tempdir) unless $opt{l}; } my @entries; my @resultarray; my $finished_tasks=0; #run all modules, create the list of checks to run. foreach my $m (@modules) { my $n="DUCK::".$m; $n->run(\%opt,\@entries); } # inject all options to check modules $DUCK->setOptions("no-https",$try_https); $DUCK->setOptions("no-check-certificate",$no_check_cert); # iterate over all urls, run checks. my $manager= new Parallel::ForkManager($parallel_tasks); #$manager->run_on_finish( sub { # my ($pid)=@_; # print "** finished, pid: $pid\n"; #}); $manager->run_on_finish( sub { my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_; if (defined($data_structure_reference)) { # children are not forced to send anything my $string = ${$data_structure_reference}; # child passed a string reference # print Dumper $string; push(@resultarray,$string); $finished_tasks++; } else { # problems occurring during storage or retrieval will throw a warning print qq|No message received from child process $pid!\n|; } # my ($pid, $exit_code) = @_; # print "** just got out of the pool ". # "with PID $pid and exit code: \n"; }); #my $entry; CHECKS: foreach my $entry (@entries) { my $res; $manager->start and next CHECKS unless $opt{n}; { if ($abort) {cleanup(); exit}; my $type=@$entry[0]; my $k=@$entry[1]; my $url=@$entry[2]; my $origline=@$entry[3]; my $extra=@$entry[4]; chomp $origline unless !$origline; if ($funcref->{$k}) { if (is_overridden(\@overrides,$url)) { next; } if ($opt{n}) { print STDOUT $type.": ".$k.": ".$url.": "; print STDOUT " DRY RUN\n"; next; } $res=&{$funcref->{$k}}($url); } } $manager->finish(0,\{res=>$res,entry=>$entry}) unless $opt{n}; } #print scalar(@resultarray)."\n"; #while ($abort == 0) #{ # print Dumper $manager; # sleep(1); #} $manager->wait_all_children unless $opt{n}; #print "Sizeof reuslts:".scalar(@resultarray)."\n"; foreach my $_res (@resultarray) { my $res=$_res->{"res"}; my $entry=$_res->{'entry'}; if (!defined($entry)) {next;} if (!defined($res)) {next;} my $type=@$entry[0]; my $k=@$entry[1]; my $url=@$entry[2]; my $origline=@$entry[3]; my $extra=@$entry[4]; # my $res=@{$_res}[0]; #print "DEPUG: entry"; #print Dumper $entry; #print "DEBUG: res"; #print Dumper $res; #print "-------------------\n"; #next; if (!defined $res) { if (!$opt{q}) { setColor(\*STDERR,$color_y); my $k; print STDERR "I: Skipping field ".$k." (Reason: Missing helper!)\n"; setColor(\*STDERR,$color_n); } } else { #print Dumper $res; # my $extra; if (ref($res) eq "ARRAY") {#print "ARRAY!!!\n"; foreach my $b (@$res) { # print "mainloop:"; # print Dumper $extra; display_blob($b,$extra); } } else { # print "BLOB\n"; display_blob($res,$extra); } } } cleanup(); exit($exitcode); ############################################################################## # Helper functions sub display_blob($;$) { (my $res,my $extra)=@_; #print "Display_blob, extra:"; #print Dumper $extra; if ($res->{retval}>0) { if ($res->{'certainty'}) { $extra->{'certainty'}=$res->{'certainty'};} if ($res->{retval}==2) { if (!$opt{q}) { setColor(\*STDERR,$color_y); print STDERR display_result($res,$extra); setColor(\*STDERR,$color_n); } } else { if (!$opt{q}) { setColor(\*STDERR,$color_r); print STDERR display_result($res,$extra); setColor(\*STDERR,$color_n); } $exitcode=1; } } else { if ($opt{v}) { setColor(\*STDOUT,$color_g); print STDOUT display_result($res,$extra); setColor(\*STDOUT,$color_n); } } } sub setColor($;$) { my ($fh,$c)=@_; if ($enforceColor eq "never") { return; } if ($enforceColor eq "always") { print $fh $c; return; } if ($enforceColor eq "auto") { if (-t $fh) { print $fh $c; return; } } } sub display_result($;$;$) { my $out=""; my ($res,$data)=@_; # print Dumper $res; # print "DATA:"; # print Dumper $data; my $prefixes={ 0 => "O: ", 1 => "E: ", 2 => "I: " }; my $states={ 0 => "OK", 1 => "ERROR", 2 => "INFORMATION" }; my $P; if ($prefixes->{$res->{retval}}) { $P=$prefixes->{$res->{retval}}; } else { $P=$prefixes->{1}; # default to Error if return value >0 and out out bounds } $out.=$P; my $indent=$P; $indent =~ s/./ /g; # try to print data supplied by check if ($data->{verbose}) { $out.=$data->{verbose}; if ($states->{$res->{retval}}) { $out.=': '.$states->{$res->{retval}}; } else { $out.=': '.$states->{1}; } $out.=' (Certainty:'.$data->{certainty}.')'; $out.="\n"; } else { if ($data->{filename}) { $out.=$data->{filename}.":"; } if ($data->{linenumber}) { $out.= $data->{linenumber}.": "; } if ($data->{checkmethod}) { $out.=$data->{checkmethod}.": "; } if ($data->{url}) { $out.=$data->{url}.": "; } $res->{retval}=2 unless $res->{retval}<2; $out.=$states->{$res->{retval}}; $out.=' (Certainty:'.$data->{certainty}.')'; $out.="\n"; } if ( $res->{response} && ($res->{retval}>0) ) { my $ts=$res->{response}; $ts =~ s/\n*$//g; $ts =~ s/^/$indent/g; $ts =~ s/\n/\n$indent/g; $out.=$ts; } $out =~ s/\n*$//g; $out.="\n\n"; return $out ; } sub handle_UrlFixDisableOptions($;$) { my ($duck,$paramlist)=(@_); my @fixes=split(/,/,$paramlist); foreach ($duck->getUrlFixes()) { $duck->setUrlFixOptions($_,1); } foreach (@fixes) { $duck->setUrlFixOptions($_,0); } } sub handle_UrlFixEnableOptions($;$) { my ($duck,$paramlist)=(@_); my @fixes=split(/,/,$paramlist); foreach ($duck->getUrlFixes()) { $duck->setUrlFixOptions($_,0); } foreach (@fixes) { $duck->setUrlFixOptions($_,1); } } sub handle_dsc($) { my ($tmpdirref)=@_; my $tempdir = File::Temp->newdir(TEMPLATE=>"duckXXXXXX",TMPDIR => 1,CLEANUP => 0)->dirname; $$tmpdirref=$tempdir; my $filename=fileparse($ARGV[0]); print "Downloading to $tempdir"."\n"; chdir($tempdir); my ($html)=@_; my $pid=open3(0,\*READ,\*ERR,'dget -d '.$ARGV[0]) or die("dget error!"); my @std_output=<READ>; my @std_err=<ERR>; my $kid; do { $kid=waitpid($pid,WNOHANG); sleep(1); } while ($kid >0 && $abort == 0); if ($abort) {kill -9,$pid;cleanup(); exit} close(READ); my $prefix="dget: "; if ($opt{'v'}) { chomp @std_output; setColor(\*STDOUT,$color_y); print join("\n$prefix",@std_output); print "\n\n"; } $pid=open3(0,\*READ,\*ERR,"dpkg-source -x $filename extract") or die("dpkg-source error!"); @std_output=<READ>; @std_err=<ERR>; do { $kid=waitpid($pid,WNOHANG); my $dpkgsource_errcode=$?; print "errcode: $dpkgsource_errcode\n"; sleep(1); } while ($kid >0 && $abort==0); if ($abort) {kill -9,$pid;cleanup(); exit} close(READ); $prefix="dpkg-source: "; if ($opt{'v'}) { chomp @std_output; setColor(\*STDOUT,$color_y);; print join("\n$prefix",@std_output); print "\n"; chomp @std_err; setColor(\*STDOUT,$color_r); print join("\n$prefix",@std_err); print "\n\n"; } chdir($tempdir."/extract"); if ($opt{'v'}) { setColor(\*STDOUT,$color_n); } return $tempdir; } sub cleanup() { chdir($origcwd); #restore terminal color print $color_n; if ($tempdir && -d $tempdir ) { if ($opt{v}) { print "removing tempdir $tempdir\n"; } remove_tree($tempdir); } } sub get_duck_overrides() { my $overrides_path='debian/duck-overrides'; my @overrides; if ( -e $overrides_path) { open (my $overrides_fh,"<",$overrides_path); @overrides=<$overrides_fh>; close($overrides_fh); } chomp(@overrides); my @results; foreach (@overrides) { if ( ! /^\s*#/) { if (length($_)>1) {push (@results,$_);} } } return @results; } sub is_overridden($;$) { my ($overrides_ref,$url)=(@_); my @overrides=@{$overrides_ref}; foreach my $ov (@overrides) { if ( $url =~ m/$ov/i) { return 1; } } return 0; } sub get_domain() { my $url=$_[0]; $url =~ /:\/\/([^\/].*)/; if (!$1) {return 0}; my @dom=split(/\//,$1); return $dom[0]; } sub HELP_MESSAGE() { if (!$descriptions) { $descriptions=" No modules, found, no further options available.\n"; } print STDOUT <<EOF; Usage: duck [options] -h\t--help\t\t\tdisplay this usage information and exit -q\t\t\t\tquiet mode, suppress all output -v\t\t\t\tverbose mode -n\t\t\t\tdry run, don't run any checks, just show what would be checked --modules-dir=dir\t\tpath to check modules --no-https\t\t\tdo not try to find matching https URLs to http URLs --no-check-certificate\tdo not check SSL certificates --color=[auto,always,never]\tauto (default): color on terminal, no color when piping output \t\t\t\tor dumb terminal \t\t\t\tnever: no color output \t\t\t\talways: show colored output always --missing-helpers\t\tdisplay list of missing external helper tools --disable-urlfix=<fix1,...>\tdisable specified url fix function(s). Use --disable-urlfix=list or \t\t\t\tsee duck(1) for available options and further information. --enable-urlfix=<fix1,...>\tenable specified url fix function(s). Use --enable-urlfix=list or \t\t\t\tsee duck(1) for available options and further information. --tasks=[number]\t\tSpecify the number of checks allowed to run in parallel. Default value is 24. \t\t\t\tThis value must be an integer value >0. --version\t\t\tdisplay copyright and version information Available module options: EOF print $descriptions; print "\n"; exit(0); } sub VERSION_MESSAGE() { my $DUCK=DUCK->new(); print "duck ".$DUCK->version()."\n"; my $copyright_year=$DUCK->copyright_year(); print <<EOF; This code is copyright $copyright_year by Simon Kainz <skainz\@debian.org> all rights reserved. This program comes with ABSOLUTELY NO WARRANTY. You are free to redistribute this code under the terms of the GNU General Public License, version 2 or later. EOF } sub missingHelpers() { my $DUCK=DUCK->new(); my $out; my $h=$DUCK->getHelpers(); if (!$h->{git}) { $out.= "git missing. Please install package git.\n"; } if (!$h->{bzr}) { $out.= "bzr missing. Please install package bzr.\n"; } if (!$h->{svn}) { $out.= "svn missing. Please install package subversion.\n"; } if (!$h->{hg}) { $out.= "hg missing. Please install package mercurial.\n"; } return $out; }