package lab.seq;

import java.io.*;

import org.biojava.bio.seq.*;
import org.biojava.bio.symbol.*;

/** Interface to Jim Kent's .nib file format.
 *
 * This should return exactly what Jim Kent's nibFrag tool returns;
 * this hasn't been tested rigorously, though.
 *
 * FIXME: this should probably get coordinates from some Range object.
 *
 * Distributed under the GNU Library General Public License, v.2 or greater.
 *
 * @author jburdick
 */
public class NibFile {
    
    /** The .nib file we're reading from. */
    RandomAccessFile f;
    
    /** The length of the file, in base pairs. */
    private int length;
    
    /** Buffer for whichever call is in progress. */
    private char[] buffer;
    
    /** Current index in the buffer. */
    private int basepairIndex;
    
    /** Creates a new instance of NibFile */
    public NibFile(RandomAccessFile f)
    throws IOException {
        this.f = f;
        readLength();
    }
    
    /** Creates a new instance of NibFile, opening the file in question. */
    public NibFile(String filename) throws IOException {
        this.f = new RandomAccessFile(filename, "r");
        readLength();
    }
    
    
    /** Reads in the number of bases from the file. */
    private void readLength() throws IOException {
        // we get the length of this from the start of the file
        // XXX this is a slightly cryptic way to read an int
        // in little-endian order
        f.seek(4);
        length = f.readUnsignedByte()
            + 256 * (f.readUnsignedByte()
                + 256 * (f.readUnsignedByte()
                    + 256 * f.readUnsignedByte()));
    }
    
    /** The number of nucleotides in this file. */
    public int getLength() {
        return length;
    }
    
    /** Gets the sequence at some range.
     * This is synchronized, because it jumps around in the file.
     * However, there shouldn't be any problem with creating
     * multiple NibFile objects reading from the same file, which
     * can read concurrently. */
    public synchronized String getSequenceByLocation
            (int start, int end, boolean plusStrand)
            throws IOException {
        
        if (end < start)
            throw new IllegalArgumentException("start needs to be <= end");
        if (start >= length)
            throw new IllegalArgumentException("start needs to be < length");
        if (end >= length)
            throw new IllegalArgumentException("end needs to be < length");
        
        setupBuffer(end - start, plusStrand);
        int b;
        
        // go to the starting location in the file
        f.seek(start / 2 + 8);
        
        // first, possibly get the first letter
        if (start % 2 == 1) {
            b = f.readUnsignedByte();
            addToBuffer(b, plusStrand);
        }
        
        // hopefully this is buffered, and so we won't incur a performance
        // penalty from reading one byte at a time
        // XXX this loop bound is slightly confusing
        for(int i=0; i<end/2 - (start+1)/2; i++) {
            b = f.readUnsignedByte();
            addToBuffer((b >> 4) % 16, plusStrand);
            addToBuffer(b % 16, plusStrand);
        }
        
        // possibly get the last letter
        if (end % 2 == 1) {
            b = f.readUnsignedByte();
            addToBuffer((b >> 4) % 16, plusStrand);
        }
        String s = new String(buffer);
        buffer = null;
        return s;
    }

    public SymbolList getSymbolListByLocation(int start, int end, boolean plusStrand)
    throws IOException, IllegalSymbolException {
        return DNATools.createDNA(getSequenceByLocation(start, end, plusStrand));
    }
    
    /** Sets up the buffer to be returned. */
    private void setupBuffer(int length, boolean plusStrand) {
        buffer = new char[length];
        if (plusStrand)
            basepairIndex = 0;
        else
            basepairIndex = length - 1;
    }
    
    /** Adds one base to the buffer. */
    private void addToBuffer(int b, boolean plusStrand) {
        // Mapping from small integers to DNA characters.
        char[] intToDNABasePlus = {
            't', 'c', 'a', 'g', 'n', '-', '-', '-',
            'T', 'C', 'A', 'G', 'N', '-', '-', '-'
        };

        // and similarly for the reverse strand
        char[] intToDNABaseMinus = {
            'a', 'g', 't', 'c', 'n', '-', '-', '-',
            'A', 'G', 'T', 'C', 'N', '-', '-', '-'
        };        
        
        if (plusStrand)
            buffer[basepairIndex++] = intToDNABasePlus[b % 16];
        else
            buffer[basepairIndex--] = intToDNABaseMinus[b % 16];
    }
}