#!/usr/bin/perl # # Translate input based on a given hash file # # (C) Gal Nov 2003 use strict; require "$ENV{HOME}/develop/perl/lib/libhash.pl"; # for load_list # Init my $n_items=1; my $fin = \*STDIN; my $hash_filename; my $fieldnum=0; my $do_field=0; my $back=0; my $verbose=0; my $missing; my $do_missing=0; my $do_add=0; my $delimiter = "\t"; my %hashtab; my $strrep=0; # Check flags while(@ARGV){ my $arg = shift @ARGV; if($arg eq '--help') {print STDOUT ; exit(0); }elsif($arg eq '-f') {$hash_filename = shift @ARGV; }elsif($arg eq '-n') {$fieldnum=shift @ARGV;$fieldnum--;$do_field=1; }elsif($arg eq '-m') {$missing=shift @ARGV;$do_missing=1; }elsif($arg eq '-b') {$back=1; }elsif($arg eq '-v') {$verbose=1; }elsif($arg eq '-add') {$do_add=1;$delimiter = shift @ARGV; }elsif($arg eq '-strrep') {$strrep=1; }else{ die("translate_byhash.pl: Bad argument '$arg'. Use --help for help."); } } if($verbose){print "verbose turned on\n";} # Read exiting hash table if($back==1){ if (-s $hash_filename){ %hashtab = load_hash_back($hash_filename); $n_items = scalar(keys %hashtab)-1; } }else{ if (-s $hash_filename){ %hashtab = load_hash($hash_filename); $n_items = scalar(keys %hashtab)-1; } } my $name; # Free-text replacement if($strrep){ my $len; while(my $line = <$fin>){ while (my ($key, $val) = each(%hashtab)){ $line =~ s/$key/$val/g; } print "$line"; } exit(0); } # Tabular replacement while(<$fin>) { chop($_); my @row = split(/\t/,$_); for(my $i=0;$i<@row;$i++){ $name = $row[$i]; if($do_field == 0 | $i == $fieldnum){ # Print replaced field if($do_add){print "$name$delimiter";} if(!defined($hashtab{$name})){ if($do_missing){ print "$missing"; if($verbose) {print STDERR "$name ==> $missing\n";} }else{ print "$name"; if($verbose) {print STDERR "'$name' not found\n";} } }else{ print "$hashtab{$name}"; if($verbose) {print STDERR "$name ==> $hashtab{$name}\n";} } if($i<@row-1){ print "\t";}; }else{ # Print original field print "$row[$i]\t"; } } print "\n"; } #---------------------------------------------------------------- # load backwards hash from file #---------------------------------------------------------------- sub load_hash_back { my ($file) = @_; open(HASHFILE, "<$file")|| die "Cant open $file: $!\n"; while (my $line = ){ chop($line); my @row = split(/\t/,$line); my $key = $row[1]; my $val = $row[0]; $hashtab{$key} = $val; } close(HASHFILE); return %hashtab; } __DATA__ Syntax: translate_byhash.pl -f FILENAME [OPTIONS] < STDIN Translate strings in input using a dictionary file. input is assumed to be tab delimited. OPTIONS are: -f FILENAME: Filename with hash table. Each row contains two fields: key, value -m MISSING: A string to fill when key is not found in hashfile. the default is to leave the string unchanged -n N Translate only the Nth field in each row (N=1 is the first field) -b BACK Translate back by the same hash table -add DELIMITER Instead of replacing each source string with target, print both. delimiter by DELIMITER -strrep In this mode, strings from hashfile are matched and replaced, regardless of columns structre. That is, tabs are treated just as regular cgaracters. Example: cat in_file | translate_byhash.pl -f directory_file > out_file