
(*  
    Mathematica script to draw sequence logos
    Written by Thomas Simonson on January 8th, 2005
    Comments/queries to thomas.simonson[at]polytechnique.fr
    This file: logo.ma ; available from http://biology.polytechnique.fr/biocomputing
    Operating-system independent: should run on Linux, NextStep, Mac OS, and even Windoze.
    This script may be freely distributed and modified.
    Use : math < logo.ma
          The logo is output as a TIFF file, logo.tif
          The data should be in a file logo.dat in the current directory.
          Data format :  - 1st line has N nucleotide names (usually N = 4 fields, eg A C G T).
                         - the following lines correspond to the successive positions in the motif;                       
                           each line contains the N corresponding nucleotide probabilities.
          To leave a gap in the final logo, simply include one or more lines of zero probabilities in logo.dat.
    Example files: logo.dat, logo.tif, logo2.dat, logo2.tif
    Drawings of the four nucleotide letters are given in the files: bigA.tif, bigC.tif, bigG.tif, bigT.tif.
    A white space and a generic nucleotide are also provided, coded by W and N, respectively. (See logo2.dat.)
    The letter drawings were produced with The Gimp, using a 72 point Charter font, and scaled to give a 100 pt width.
  *)

      (* some function definitions *)
height[i_Integer] := Sort[h[[i]], Greater];                             (* nucleotide heights, sorted according to their magnitude *)
order[i_Integer] := Ordering[h[[i]], n, Greater];                       (* sorting key *)
nnuc[i_Integer] := nuc[[order[i]]]                                      (* sort nucleotides according to their probability  *)
ypos[i_Integer, 0] := 0 ; ypos[i_Integer, j_Integer] := ypos[i, j - 1] + height[i][[j]]  (* vertical positions of stacked letters *)
letters[i_Integer, j_Integer] := Import["big" <> nnuc[i][[j]] <> ".tif", "TIFF"]         (* read in letter drawings *)   
pixels[i_Integer, j_Integer] := letters[i, j][[1, 1]]/Max[letters[i, j][[1, 1]]]         (* extract raster array *)
      (* now read the data and make plot *)
tmp = ReadList["logo.dat", Word];                                                       
n=1; While[tmp[[n]] == "A" || tmp[[n]] == "C" || tmp[[n]] == "G" || tmp[[n]] == "T" || tmp[[n]] == "N" || tmp[[n]] == "W", n++]; n-- ;
nuc = Take[tmp, n]; h = Partition[Map[ToExpression[#]&, Drop[tmp, n]], n]; npos = Length[h];
{dx, dy} = Dimensions[pixels[1, 1]]; 
p=Show[Graphics[Flatten[Table[Table[ Raster[pixels[i,j], {{(i-1)*dx,ypos[i,j-1]},{i*dx,ypos[i,j]}}], {j,1,n}],{i,1,npos}]]],
             Frame->True,FrameTicks->None];           
Export["logo.tif",p,"TIFF"]; 


