#!/usr/bin/perl # findnewfeed # Run the news spool reading the paths of articles. Track the sites by # number of hops and by which of our feeds the article arrived on. # Find the sites which are, on average, farthest from us and would therefore # do us (and them) the most good to hook up to. # This program runs for a LONG time. print "Findnewfeed:\n"; $root = shift @ARGV; chop($root = `pwd`) unless defined $root; print "\troot is $root\n"; &rundirectory($root); # recursively run the directory &dostats; # find the answer sub rundirectory{ local($root) = @_; push(@dirlist,$root); while($dir = pop(@dirlist)){ chdir($dir) || do { print "CHDIR failed: $!\n"; next; }; opendir(DIR, ".") || do { print "OPENDIR failed: $!\n"; next; }; foreach $f (readdir(DIR)){ ($f =~m/^\./) && next; # skip . .. ($links,$inode)=(lstat($f))[3,1]; if(-l _){ next; # punt on symlinks so we don't need to # track devno as well as inodes }; if(-d _){ push(@dirlist,"$dir/$f"); next; } if($links > 1){ if($links{$inode}){ next; # already done this one } else { $link{$inode} = 1; # don't do it again later } } &doart($f); # do it, relative to current directory } closedir DIR; } } sub doart{ local($file) = @_; open(ART, $file) || do{ print "Can't open $file: $!\n"; return; }; while(){ $path = $_; next unless m/^Path: (.*)/i; $section = $1; last; } close ART; $dist = 1; @path = split(/!/, $section); if((shift @path) ne 'navair2.nalda.navy.mil'){ print "INTERNAL ERROR: lost self: $f\n"; return; } $feed = shift @path; $feeds{$feed} = 1; # list of feeds grep( do{++$sites{$_}, # hit count for this site and list of sites ++$sitec{$feed,$_}, # hit count for this site from this feed $sitew{$feed,$_}+= $dist, # total length to this site from this feed ++$dist,0} ,@path); } sub dostats{ @sitelist = keys %sites; # pre-calculate the messy bits foreach $x (@sitelist){ $fcount = 0; # number of feeds that got to this site $totav = 0; # total average foreach $f (keys %feeds){ $bot = $sitec{$f,$x}; if($bot){ $top = $sitew{$f,$x}; $fcount++; $totav += ($top/$bot); } } if($fcount == 0){ print "INTERNAL ERROR: /0: $x\n"; next; } $sitecalc{$x} = $totav/$fcount; } @res = sort MESSYSORT @sitelist; print "Results:\t"; foreach $f (keys %feeds){ print "$f\t"; } print "\n"; foreach $x (@res){ printf "%6.2f %30s ",$sitecalc{$x},$x; foreach $f (keys %feeds){ if($sitec{$f,$x}){ printf "%6.2f\t", $sitew{$f,$x}/$sitec{$f,$x}; } else { print "\t\t"; } } print "\n"; } } sub MESSYSORT{ $sitecalc{$a} <=> $sitecalc{$b} }