#!/usr/bin/perl -w use utf8; open(FH, "< /home/eckhard/parsers/eng/lex/baselex.eng"); # replace with your own path while () { if (/ [A-Z]+/) { s/\t/\n\t/g; ($word,$tags) = split /,/; $lex{$word} .= "\t" . $tags; } } while (<>) { $word =$_; if (/^[^\n<]/) { chop $word; if ($word =~ /^[\W0-9]+$/) { print "\"<\$$word>\"\n\t\"$word\"\n"; } else { print "\"<$word>\"\n"; if ($lex{$word}) { print $lex{$word}; } elsif ($lex{"\l$word"}) { print $lex{"\l$word"}; } } } else {print;} }