#!/usr/bin/perl -w # # # use strict; no strict "subs"; use XML::Smart; use IO::File; use Data::Dumper; use Digest::MD5 qw(md5_hex); # simple postprocess NCBI BLAST -m 7 # for diff based signature comparisons # # Copyright (c) 2005 Scalable Informatics LLC # Dual licensed: # For GPL and open source projects, this code carries a GPL v2.0 License # For commercially redistributed and closed source projects, you will need # to obtain an appropriate closed source license from Scalable Informatics LLC # # Support for this code is available for a fee from Scalable Informatics LLC # http://www.scalableinformatics.com # landman@scalableinformatics.com # my ($tag,$d,$in,$xml,@hits,@lines,$file,$all,@sub_documents); my (@results,$query,$x1); $in = shift; $file = IO::File->new; if ($file->open("< ".$in)) { @lines = $file->getlines; $all=join("",@lines); } @sub_documents = split(/\<\?xml version=\"1.0\"\?>/,$all); shift @sub_documents; foreach (@sub_documents) { undef $xml; $xml = XML::Smart->new($_); $query= md5_hex($xml->{BlastOutput}{'BlastOutput_query-def'}); @hits=$xml->{'BlastOutput'}{'BlastOutput_iterations'}{'Iteration'}{'Iteration_hits'}{'Hit'}('@'); foreach $x1 ( sort { $a->{'Hit_id'} <=> $b->{'Hit_id'} } @hits ) { push @results,sprintf("%s:%s:%s",$query,md5_hex($x1->{'Hit_id'}),$x1->{'Hit_hsps'}{'Hsp'}{'Hsp_evalue'}); } } foreach (@results) { printf "%s\n",$_; }