По поводу польских файлов предлагаю следующее. Алгоритм никакой, но на польских файлах срабатывает хорошо за счет non-ascii. Было бы интересно сделать обучающую машину, но на это может уйти много времени. --- /usr/lib/rpm/find-provides~ 2003-09-27 22:27:57 +0400 +++ /usr/lib/rpm/find-provides 2003-10-06 19:35:16 +0400 @@ -146,7 +146,7 @@ # Ignore symlinks for non-PAM scripts. [ ! -L "$f" ] || return 0 - if [ -z "${t##perl script text*}" -o -z "${t##Perl5 module source text}" ] || [ -z "${f%%*.p[lmh]}" -a -z "${t##*text*}" ]; then + if [ -z "${t##perl script text*}" -o -z "${f%%*.p[lmh]}" ]; then if [ -n "$FIND_PERL" ]; then [ -z "$LIST_PERL" ] && LIST_PERL="$f" || LIST_PERL="$LIST_PERL $f" --- /usr/lib/rpm/perl.req 2003-09-30 14:42:01 +0000 +++ RPM/SOURCES/rpm-perl.req 2003-10-06 15:28:05 +0000 @@ -276,9 +276,18 @@ sub process_file { process_line($_); } unless (close PIPE) { - $method ne 'relaxed' - and die "$fname: deparse failed.\n" - or warn "$fname: deparse failed.\n"; + if ($method eq 'relaxed') { + warn "$fname\: deparse failed, but I don't care\n"; + } elsif ($method eq 'strict') { + die "$fname\: deparse failed.\n"; + } else { + my $v = isPerl($fname); + if ($v > 0) { + die "$fname\: deparse failed. isPerl=$v\n"; + } else { + warn "$fname\: deparse failed, isPerl=$v, ok\n"; + } + } } } @@ -390,3 +399,56 @@ foreach my $k (keys %req) { } # nothing special? print "perl-base\n" unless %req; + +# auxiliary stuff +sub count($$) { + debug "$_[0] $_[1]"; +} + +sub isPerl { + my $fname = shift; + chomp $fname; + open(FILE, $fname) || die; + local $_ = join "" => ; + close FILE; + debug "processing $fname"; + my ($n, @n); + +# POSITIVE +# variables + @n = /[$%@]\w+/g; + count @n, "variables"; + $n += @n; +# comments + @n = /^\s*#/gm; + count @n, "comments"; + $n += @n; +# blocks + @n = /{$|^\s*}/gm; + count @n, "blocks"; + $n += @n; +# keywords + @n = /\b(unless|foreach|package|sub|use|strict)\b/gm; + count @n, "keywords"; + $n += @n; +# pod + @n = /^=(?:back|begin|cut|end|for|head|item|over|pod)/gm; + count @n, "pod sections"; + $n += @n; +# modules + @n = /^1;$/gm; + count @n, "`1;'"; + $n += @n; + +# NEGATIVE +# non-ascii characters + @n = /[^[:ascii:]]/g; + count @n, "non-ascii characters"; + $n -= @n; +# prolog + @n = /:-/g; + count @n, "prolog :- operators"; + $n -= @n; +# density + $n /= -s $fname; +}