#!/usr/bin/perl

$goodenough=4;		# how good does it have to be before we don't look further?
$firstblock=2048;	# how much to grab for a second look

sub sortof
	{if ($_[0] eq "A")	{$sortof="Audio";}
	elsif ($_[0] eq "B")	{$sortof="Binary";}
	elsif ($_[0] eq "C")	{$sortof="Compressed";}
	elsif ($_[0] eq "D")	{$sortof="Document";}
	elsif ($_[0] eq "E")	{$sortof="Encryption";}
	elsif ($_[0] eq "F")	{$sortof="FileArchive";}
	elsif ($_[0] eq "G")	{$sortof="Graphics";}
	elsif ($_[0] eq "H")	{$sortof="Hypertext";}
	elsif ($_[0] eq "I")	{$sortof="DiskImage";}
	elsif ($_[0] eq "M")	{$sortof="Email";}
	elsif ($_[0] eq "P")	{$sortof="Password";}
	elsif ($_[0] eq "S")	{$sortof="SourceCode";}
	elsif ($_[0] eq "T")	{$sortof="Text";}
	else {$sortof="Unknown";}
	}

open(F,"headers.dat");@H=<F>;close(F);
foreach $i (@H)
	{chop($i);
	($ext, $hex, $sortof, $comment)=split(" ", $i, 4);
	if (@type{"$hex"} eq "")
		{@type{"$hex"}=$ext;@comment{"$hex"} = $comment;@sortof{"$hex"}=sortof($sortof);}
	else	{@type{"$hex"}=@type{"$hex"} . " " . $ext;
		@comment{"$hex"} = @comment{"$hex"} . " " . $comment;
		@sortof{"$hex"} = @sortof{"$hex"} . " " . sortof($sortof);}
	}

open(F,"content.dat");@H=<F>;close(F);
foreach $i (@H)
	{chop($i);
	($ext, $phrase, $sortof, $comment)=split(/:/, $i, 4);
	unshift(@phraselist,$phrase);
	if (@phrase{"$phrase"} eq "")
		{@phrase{"$phrase"}=$ext;@comment{"$phrase"} = $comment;@sortof{"$phrase"}=sortof($sortof);}
	else	{@phrase{"$phrase"}=@phrase{"$phrase"} . " " . $ext;
		@comment{"$phrase"} = @comment{"$phrase"} . " " . $comment;
		@sortof{"$phrase"} = @sortof{"$phrase"} . " " . sortof($sortof);}
	}

sub tryharder {
	open(F,"$file");$maxsize=read(F,$block,$firstblock);close(F);
	foreach $str (@phraselist)
		{# print "\t$str\n";
		$_=$b=$block;
		if (/$str/i) {$r=@phrase{"$str"};$c=@comment{"$str"};$so=@sortof{"$str"};$howgood=5;break;}
		}
	}

sub desperation {
	# already done from tryharder	open(F,"$file");$maxsize=read(F,$block,$firstblock);close(F);
	$a=$block;$a=~s/[a-zA-Z \t\n\r\~\!\$\%\&\*\+\(\)\-\[\]\.\,\?\'\"\;\:0-9\`\/\#]//g;
	if ($a eq "") {$so="Text";$c="Appears to be text only (per 1st $firstblock bytes)";$howgood=2;$r="*";return;}
	$a=~s/\x1a//g;
	if ($a eq "") {$so="Text";$c="Appears to be text only (per 1st $firstblock bytes) with DOS EOF marker (^Z)";$howgood=2;$r="*";return;}
	$a=~s/[\f\x08]//g;
	if ($a eq "") {$so="Text";$c="Appears to be text and printer controls only ($firstblock bytes)";$howgood=2;$r="*";return;}
	$a=~s/[\\\=\/\@\|\#\<\>\{\}\_\^\x07]//g;
	if ($a eq "") {$so="Text";$c="Appears to be text, printer controls, and some programming characters ($firstblock bytes))";$howgood=2;$r="*";return;}
	$a=~s/[\xa0-\xff\x80-\x8f\x9b\x1b\xfa]//g; 
	if ($a eq "") {$so="Text";$c="Appears to be a DOS screen shot - text plus formatting characters. Hard to tell for sure.";$howgood=2;$r="*";return;}
	$so="Unknown";$c="Not a recognized file format - a memory image?";$howgood=1;$r="*";
	}

sub checkfile {
	$r="";$c="";$stuff="";$H="";$h="";$bytes="";
	$s=$c=$so="Unlisted";
#	$r="*";
	print "$file ";
	$maxlen=12;		# maximum number of bytes to look at
	open(F,"$file");$maxsize=read(F,$bytes,$maxlen);close(F);
#	print "$bytes ";
	$H=unpack("H*",$bytes);
	$h=unpack("h*",$bytes);
#	print "$H ";
	$l=0;$howgood=0;
# always produce the longest match you find... howgood is it anyway?
# check for both big endian and small endian data files...
	while ($l < $maxsize+1)
		{$stuff=substr($H,0,($l * 2));
		$result=@type{"$stuff"};$comment=@comment{"$stuff"};$sortof=@sortof{"$stuff"};
		if ($result ne "") {$r = $result; $c = $comment; $s = $stuff; $so = $sortof;$howgood=$l;}
		$stuff=substr($h,0,($l * 2));
		$result=@type{"$stuff"};$comment=@comment{"$stuff"};$sortof=@sortof{"$stuff"};
		if ($result ne "") {$r = $result; $c = $comment; $s = $stuff; $so = $sortof;$howgood=$l;}
		$l++;}
	if ($howgood < $goodenough) {tryharder();}
	if ($howgood < 2) {desperation();}
	if ($r ne "") {print "\t($howgood)$so '$r': $c";}
	print "\n";
}

foreach $file (@ARGV) {if (-f $file) {checkfile();}}
while (<STDIN>) {$file=$_;chop $file;if (-f $file) {checkfile();}}
