130 likes | 236 Views
Deidrey Langat Shinen Lo Mahmoud Rezaei Carissa Tudor. Problem 4: Bioinformatics-Identifying CpG Islands. Hypothesis.
E N D
Deidrey Langat Shinen Lo MahmoudRezaei Carissa Tudor Problem 4: Bioinformatics-Identifying CpG Islands
Hypothesis Where probes present an overexpression after demethylation treatment there will be a significantly higher occurance of CpG Islands around those probes than around probes that do not have an overexpression following demethylation. i.e. Probe overexpression is correlated with the number of CpG Islands in existence 10,000 base pairs upstream of the Probe.
Perl: Pseudocode 1. Open xls file and sort using Perl2. Read in data of top and lowest Expression levels based on demethylation treatment 3. Navigate to Genome Browser database with parameters set as follows:* Groups - "All Tracks"* Track - "CpG Islands"4. Submit each of the chromosomal location from xls file as strings to the genome browser database5. Click on 'Get output' to locate CpG islands6. Count # of results7. Record # or results for each sample data
Perl: Step 1-Reading Excel file Code: #!/usr/bin/perl –w use strict; use Win32::OLE qw(in with); useWin32::OLE::Const 'Microsoft Excel'; $Win32::OLE::Warn = 3; # die on errors...# get already active Excel application or open newmy $Excel = Win32::OLE->GetActiveObject('Excel.Application') || Win32::OLE->new('Excel.Application', 'Quit'); # open Excel filemy $Book = $Excel->Workbooks->Open("C:/Documents and Settings/Mahmoud/Desktop/Claremont/forfirst1000highmRNA.xls"); # You can dynamically obtain the number of worksheets, rows, and columns # through the Excel OLE interface. Excel's Visual Basic Editor has more # information on the Excel OLE interface. Here we just use the first# worksheet, rows 1 through 4 and columns 1 through 3. # select worksheet number 1 (you can also select a worksheet by name)my $Sheet = $Book->Worksheets(1); my $newvalue = 10000; foreach my $row (1..1000) { foreach my $col (55) { # skip empty cells next unless defined $Sheet->Cells($row,$col)->{'Value'}; # print out the contents of a cell printf "At ($row, $col) the value is %s and we looked at upstream from %s to %s\n", $Sheet->Cells($row,$col)->{'Value'}, $Sheet->Cells($row,$col)->{'Formula'}-$newvalue, $Sheet->Cells($row,$col)->{'Value'}; }} # clean up $Book->Close;
Perl: Step 1-Reading Excel file Output: At (1, 55) the value is 65370390 and we looked at upstream from 65360390 to 65370390 At (2, 55) the value is 30769374 and we looked at upstream from 30759374 to 30769374 At (3, 55) the value is 99484506 and we looked at upstream from 99474506 to 99484506 At (4, 55) the value is 42484609 and we looked at upstream from 42474609 to 42484609
FASTA Format Code: #!/bin/perl -w use Bio::Seq;use Bio::SeqIO; $seq_obj = Bio::Seq->new(-seq => "aaaatgggggggggggccccgtt", -display_id => "#12345", -desc => "example 1", -alphabet => "dna" ); #!/bin/perl -w use Bio::Seq;use Bio::SeqIO; $seq_obj2 = Bio::Seq->new(-seq => "aaaatgggggggggggcccccccccc", -display_id => "#12346", -desc => "example 2", -alphabet => "dna" ); $seqio_obj = Bio::SeqIO->new(-file => '>sequence.fasta', -format => 'fasta' ); $seqio_obj->write_seq($seq_obj); $seqio_obj = Bio::SeqIO->new(-file => '>sequence.fasta', -format => 'fasta' ); $seqio_obj->write_seq($seq_obj); $seqio_obj->write_seq($seq_obj2);
FASTA Format Output: >#12345 example 1aaaatgggggggggggccccgtt>#12346 example2aaaatgggggggggggcccccccccc
Bioperl Code: #!/local/bin/perl –w use Bio::DB::GenBank;my $gb =new Bio::DB::GenBank(-retrievaltype=>'tempfile',-format=>'Fasta'); my ($seq) = $seq =$gb->get_Seq_by_id("AB000460"); print $seq->id, "\n"; print $seq->desc(), "Sequence:\n"; print $seq->seq(), "\n";exit;
CACAATGACATGCAGACCTGCATATTGGAGCTGGACGGAGAAACTGGGCTAATGTGACAGACAGCAACAAGAGTAAGGCAGTTGCTTCGCTATTGAGAGAAAGAACCATATGAAGAAATTTCGGCAGAGGCGGACCGGGAACCTCAGCAGCTGCAGAACTACTGGTCAGAAGTGCGCTACACGGTGCGCTGCATCTACCGCCAGGCAGGAACCCCGCTGGCAGATGACCAGGACCAGTCTCTGGTGCCTGACAAGGAGGGAGTGAAGGAGCTCGTGGATAGGCTCTGCGAGAGGGACCCCTACCAGCTGTACCAGCGTCTGGAACAGCAAGCTCGAGAGTATGTGCTGGAGATGAAGGTCCGCCTGCTCCGGCAGCTGTCGGCTGCGGCCAAGGTGAAGGCACCATCTGGCCTGCAGGGCCCGCCGCAAGCGCACCAGTTCATCTCCCTCCTGCTTGAGGAGTACGGCGCCCTCTGCCAGGCCGCACGCTCCATCAGCACCTTCCTTGGCACTCTGGAAAATGAACACTTGAAAAAGTTCCAAGTGACGTGGGAACTGCATAATAAACACCTGTTTGAAAATCTGGTCTTTTCGGAGCCACTTCTTCAGAGCAACTTGCCCGCACTGGTGTCACAGATCAGGCTAGGAACCACCACACACGACACCTGCAGTGAGGACACATACAGTACCTTGCTGCAGAGGTACCAGCGTTCCGAGGAGGAGCTGCGCAGAGTCGCCGAGGAGTGGCTGGAGTGCCAGAAGAGGATCGACGCCTATGTCGACGAGCAGATGACAATGAAAACCAAGCAGCGCATGTTAACAGAAGACTGGGAGCTTTTTAAACAAAGAAGATTCATTGAAGAACAGTTAACCAATAAGAAAGCAGTTACTGGCGAGAACAACTTCACAGACACCATGAGGCACGTGTTATCGTCCCGGCTGAGCATGCCCGACTGCCCCAACTGCAACTACAGGAGAAGATGTGCTTGCGATGACTGCAGTCTCTCACACATCCTCACGTGTGGTATCATGGACCCCCCCGTCACTGATGACATCCACATTCACCAGCTCCCACTTCAAGTGGATCCTGCTCCTGACTATCTTGCTGAGAGGAGCCCGCCCAGTGTGTCATCTGCAAGCTCGGGGTCCGGCTCCAGCTCTCCCATCACAATTCAGCAGCACCCCAGGCTCATCCTCACAGACAGTGGCTCGGCACCAACTTTTTGTAGTGATGATGAAGATGTTGCACCATTGTCAGCCAAATTTGCTGATATTTATCCATTGAGTAATTATGATGATACCGAGGTGGTGGCCAACATGAATGGAATCCACAGCGAATTGAATGGTGGCGGGGAAAACATGGCCCTGAAGGATGAGTCTCCTCAGATAAGCAGTACCAGCAGTAGTTCCTCAGAAGCTGATGATGAAGAAGCGGACGGCGAGAGTAGTGGGGAGCCCCCAGGGGCCCCGAAGGAAGATGGAGTGCTGGGAAGCAGGAGCCCCAGGACAGAGGAGAGCAAAGCAGACAGTCCACCCCCATCCTACCCAACACAGCAGGCTGAACAAGCTCCAAACACTTGTGAATGTCATGTTTGTAAGCAAGAAGCTTCTGGACTGACACCATCTGCAATGACAGCCGGAGCCCTTCCTCCTGGCCATCAGTTCTTGAGCCCAGAGAAGCCCACACACCCTGCACTGCACCTTTACCCTCACATCCATGGACATGTGCCTTTGCACACTGTTCCACACCTGCCACGCCCTCTCATCCACCCCACCTTGTATGCAACGCCCCCCTTCACACACAGTAAGGCTTTACCGCCAGCACCTGTTCAGAATCACACAAATAAGCATCAGGTATTCAATGCATCTCTTCAAGACCATATTTATCCGAGCTGTTTTGGGAATACTCCAGAGTGGAATAGTTCTAAATTTATAAGTCTTTGGGGATCAGAAGTGATGAATGATAAGAACTGGAATCCTGGCACTTTCTTGCCAGATACAATTTCTGGGAGTGAAATATTAGGGCCAACACTCTCAGAAACAAGACCGGAAGCCCTTCCACCTCCATCTAGCAATGAAACACCTGCAGTCTCGGATAGTAAAGAGAAAAAGAATGCTGCAAAAAAGAAATGTTTATACAATTTCCAAGATGCTTTCATGGAAGCAAATAAAGTTGTCATGGCCACGTCATCAGCCACGTCCTCTGTGTCCTGCACAGCTACCACAGTGCAGTCCAGCAACAGCCAGTTCAGAGTGTCATCCAAGAGACCTCCTTCAGTAGGTGACGTGTTTCATGGCATCAGCAAGGAGGACCACAGACACTCGGCCCCAGCCGCCCCGAGGAATAGCCCCACGGGCTTGGCCCCCCTCCCAGCGCTCTCGCCTGCTGCGCTGTCACCTGCTGCGCTCTCACCTGCCTCCACACCTCACCTTGCAAATCTTGCAGCCCCATCATTCCCCAAAACAGCAACCACAACTCCTGGGTTTGTGGACACACGCAAGAGTTTCTGTCCTGCACCCCTACCCCCGGCCACAGATGGCTCCATTAGCGCCCCTCCAAGTGTCTGCAGTGACCCTGACTGCGAAGGGCACCGCTGCGAGAATGGTGTCTACGACCCACAGCAGGATGATGGGGACGAGAGTGCAGATGAGGACAGCTGCTCTGAGCACAGCTCCAGCACCTCGACCTCCACCAACCAGAAGGAGGGCAAGTACTGCGACTGCTGCTACTGCGAATTCTTTGGGCACGGCGGGCCTCCAGCTGCACCAACAAGTAGAAATTATGCAGAAATGAGGGAAAAGCTTCGCTTACGGCTGACCAAGAGGAAAGAGGAGCAACCTAAAAAAATGGACCAGATCTCAGAAAGGGAAAGCGTCGTTGACCATCGGAGGGTGGAGGATTTGTTGCAGTTTATAAATAGCTCCGAAACCAAACCAGTGAGCAGCACGCGTGCAGCGAAGCGAGCAAGGCATAAGCAAAGGAAGCTGGAGGAGAAAGCTCGCCTAGAAGCAGAGGCCAGGGCCCGGGAGCACCTGCACCTCCAGGAGGAGCAGAGGCGGCGGGAGGAGGAGGAGGATGAGGAAGAAGAGGAGGATCGTTTCAAGGAGGAATTTCAGCGGCTTCAGGAGCTTCAGAAGCTAAGAGCTGTAAAAAAGAAGAAGAAGGAGAGGCCAAGTAAAGACTGCCCCAAGTTGGACATGCTCACTAGAAATTTCCAGGCAGCAACAGAGTCTGTTCCTAACTCTGGAAACATCCACAATGGCTCACTAGAGCAAACTGAAGAACCAGAAACCTCTTCTCACTCCCCATCCAGGCATATGAACCACTCAGAGCCCAGGCCAGGGCTAGGGGCTGATGGGGATGCTGCAGACCCCGTCGACACCAGAGACTCCAAATTTCTCCTCCCCAAGGAGGTGAATGGGAAGCAGCATGAGCCACTCTCTTTTTTCTTCGACATCATGCAGCACCATAAAGAAGGAAATGGCAAGCAGAAGCTGAGGCAGACCAGCAAGGCCAGCAGCGAGCCAGCGAGGAGGCCCACAGAGCCCCCCAAGGCCACAGAGGGGCAGTCCAAGCCCCGGGCCCAGACTGAGTCAAAGGCTAAGGTGGTCGACCTCATGTCCATCACAGAGCAGAAAAGAGAGGAGAGAAAAGTCAACAGTAATAACAATAACAAAAAGCAGCTGAACCACATCAAGGACGAAAAGTCAAACCCAACCCCTATGGAGCCCACCTCTCCCGGTGAGCATCAGCAGAACAGCAAGCTGGTGCTGGCAGAGTCCCCTCAGCCAAAGGGCAAGAACAAGAAAAATAAGAAGAAGAAAGGAGACAGAGTCAACAATTCAATTGATGGAGTTTCGCTCTTGTTGCCCAGTCTGGGGTACAATGGTGCAATCTTGGCTCACTGCAACCTTCGCCTCCCAGGTTCAAGCGATTGTGCTGCCTCAGCCTCCCAAGTAGTTGGAATTACAGATGATGTCTTTCTACCTAAAGATATTGACCTAGACAGTGTGGATATGGATGAGACAGAGAGGGAAGTGGAATATTTCAAAAGGTTCTGCTTGGATTCTGCTAGACAGACCCGACAAAGACTGTCTATCAACTGGTCCAATTTTAGCTTGAAAAAAGCCACCTTTGCTGCCCACTGAATGAGGACTCCCTGGAGAGGGACACGCGAGAGGCAGGCCAGGCTGCACCACCCCAAGAGCCACGCCCCTCGCTGGCGCCCCAGAGCCGTGGTGCTTGCCAAGGGCTGTGCGGAGCTGGTGCTGCCTGAAACCCCAGACCGAGAAGTTGATGCTCGGCCCACGCCGTTAGCTCGTGTGCGTGTAGTCTGTGCGTGAGACTCCTTCGATTGTAGCTCTGTGCTGTCGGATTGGAACAGTAGTTCCCGCCAAGTCCTCCCACCACCGCGGCCTCGGAGGCCTGGGCCGTGGCCAGATAGGAGTTTGCATCATCCACGTGGCTCCGTTGCCTCTGCATTGCGCCCTGTCCTGTCATGTGTCCTCACCGGGGTATCGGCCGTCACTCAGCTCTCCTGTGCCCCTGCGTCTCACCCTAGGCGGGCTGGGCGGGGCAGGCCTCCTTTGTTCTCCACAATCTACTGTCTCCGAGTGTACACGTTGCGCTGTTTGTGTTTGATCCCCCTGACTTGTAGCCAGCTTGTGTAAGATCCCTTGCAGAACGAGAAAGTTAAAAACAAGCCCACCCAGTACTCACACCATCAAGTCTGTTATAGAGTGTACGACTGTATTAACACGGAGGCCTGCCTGGCTACTTTTTTAACATATTGTTAAGTAATATTAAAATCATGTCTTTCTTTTTGAAAGATGCACAATGACATGCAGACCTGCATATTGGAGCTGGACGGAGAAACTGGGCTAATGTGACAGACAGCAACAAGAGTAAGGCAGTTGCTTCGCTATTGAGAGAAAGAACCATATGAAGAAATTTCGGCAGAGGCGGACCGGGAACCTCAGCAGCTGCAGAACTACTGGTCAGAAGTGCGCTACACGGTGCGCTGCATCTACCGCCAGGCAGGAACCCCGCTGGCAGATGACCAGGACCAGTCTCTGGTGCCTGACAAGGAGGGAGTGAAGGAGCTCGTGGATAGGCTCTGCGAGAGGGACCCCTACCAGCTGTACCAGCGTCTGGAACAGCAAGCTCGAGAGTATGTGCTGGAGATGAAGGTCCGCCTGCTCCGGCAGCTGTCGGCTGCGGCCAAGGTGAAGGCACCATCTGGCCTGCAGGGCCCGCCGCAAGCGCACCAGTTCATCTCCCTCCTGCTTGAGGAGTACGGCGCCCTCTGCCAGGCCGCACGCTCCATCAGCACCTTCCTTGGCACTCTGGAAAATGAACACTTGAAAAAGTTCCAAGTGACGTGGGAACTGCATAATAAACACCTGTTTGAAAATCTGGTCTTTTCGGAGCCACTTCTTCAGAGCAACTTGCCCGCACTGGTGTCACAGATCAGGCTAGGAACCACCACACACGACACCTGCAGTGAGGACACATACAGTACCTTGCTGCAGAGGTACCAGCGTTCCGAGGAGGAGCTGCGCAGAGTCGCCGAGGAGTGGCTGGAGTGCCAGAAGAGGATCGACGCCTATGTCGACGAGCAGATGACAATGAAAACCAAGCAGCGCATGTTAACAGAAGACTGGGAGCTTTTTAAACAAAGAAGATTCATTGAAGAACAGTTAACCAATAAGAAAGCAGTTACTGGCGAGAACAACTTCACAGACACCATGAGGCACGTGTTATCGTCCCGGCTGAGCATGCCCGACTGCCCCAACTGCAACTACAGGAGAAGATGTGCTTGCGATGACTGCAGTCTCTCACACATCCTCACGTGTGGTATCATGGACCCCCCCGTCACTGATGACATCCACATTCACCAGCTCCCACTTCAAGTGGATCCTGCTCCTGACTATCTTGCTGAGAGGAGCCCGCCCAGTGTGTCATCTGCAAGCTCGGGGTCCGGCTCCAGCTCTCCCATCACAATTCAGCAGCACCCCAGGCTCATCCTCACAGACAGTGGCTCGGCACCAACTTTTTGTAGTGATGATGAAGATGTTGCACCATTGTCAGCCAAATTTGCTGATATTTATCCATTGAGTAATTATGATGATACCGAGGTGGTGGCCAACATGAATGGAATCCACAGCGAATTGAATGGTGGCGGGGAAAACATGGCCCTGAAGGATGAGTCTCCTCAGATAAGCAGTACCAGCAGTAGTTCCTCAGAAGCTGATGATGAAGAAGCGGACGGCGAGAGTAGTGGGGAGCCCCCAGGGGCCCCGAAGGAAGATGGAGTGCTGGGAAGCAGGAGCCCCAGGACAGAGGAGAGCAAAGCAGACAGTCCACCCCCATCCTACCCAACACAGCAGGCTGAACAAGCTCCAAACACTTGTGAATGTCATGTTTGTAAGCAAGAAGCTTCTGGACTGACACCATCTGCAATGACAGCCGGAGCCCTTCCTCCTGGCCATCAGTTCTTGAGCCCAGAGAAGCCCACACACCCTGCACTGCACCTTTACCCTCACATCCATGGACATGTGCCTTTGCACACTGTTCCACACCTGCCACGCCCTCTCATCCACCCCACCTTGTATGCAACGCCCCCCTTCACACACAGTAAGGCTTTACCGCCAGCACCTGTTCAGAATCACACAAATAAGCATCAGGTATTCAATGCATCTCTTCAAGACCATATTTATCCGAGCTGTTTTGGGAATACTCCAGAGTGGAATAGTTCTAAATTTATAAGTCTTTGGGGATCAGAAGTGATGAATGATAAGAACTGGAATCCTGGCACTTTCTTGCCAGATACAATTTCTGGGAGTGAAATATTAGGGCCAACACTCTCAGAAACAAGACCGGAAGCCCTTCCACCTCCATCTAGCAATGAAACACCTGCAGTCTCGGATAGTAAAGAGAAAAAGAATGCTGCAAAAAAGAAATGTTTATACAATTTCCAAGATGCTTTCATGGAAGCAAATAAAGTTGTCATGGCCACGTCATCAGCCACGTCCTCTGTGTCCTGCACAGCTACCACAGTGCAGTCCAGCAACAGCCAGTTCAGAGTGTCATCCAAGAGACCTCCTTCAGTAGGTGACGTGTTTCATGGCATCAGCAAGGAGGACCACAGACACTCGGCCCCAGCCGCCCCGAGGAATAGCCCCACGGGCTTGGCCCCCCTCCCAGCGCTCTCGCCTGCTGCGCTGTCACCTGCTGCGCTCTCACCTGCCTCCACACCTCACCTTGCAAATCTTGCAGCCCCATCATTCCCCAAAACAGCAACCACAACTCCTGGGTTTGTGGACACACGCAAGAGTTTCTGTCCTGCACCCCTACCCCCGGCCACAGATGGCTCCATTAGCGCCCCTCCAAGTGTCTGCAGTGACCCTGACTGCGAAGGGCACCGCTGCGAGAATGGTGTCTACGACCCACAGCAGGATGATGGGGACGAGAGTGCAGATGAGGACAGCTGCTCTGAGCACAGCTCCAGCACCTCGACCTCCACCAACCAGAAGGAGGGCAAGTACTGCGACTGCTGCTACTGCGAATTCTTTGGGCACGGCGGGCCTCCAGCTGCACCAACAAGTAGAAATTATGCAGAAATGAGGGAAAAGCTTCGCTTACGGCTGACCAAGAGGAAAGAGGAGCAACCTAAAAAAATGGACCAGATCTCAGAAAGGGAAAGCGTCGTTGACCATCGGAGGGTGGAGGATTTGTTGCAGTTTATAAATAGCTCCGAAACCAAACCAGTGAGCAGCACGCGTGCAGCGAAGCGAGCAAGGCATAAGCAAAGGAAGCTGGAGGAGAAAGCTCGCCTAGAAGCAGAGGCCAGGGCCCGGGAGCACCTGCACCTCCAGGAGGAGCAGAGGCGGCGGGAGGAGGAGGAGGATGAGGAAGAAGAGGAGGATCGTTTCAAGGAGGAATTTCAGCGGCTTCAGGAGCTTCAGAAGCTAAGAGCTGTAAAAAAGAAGAAGAAGGAGAGGCCAAGTAAAGACTGCCCCAAGTTGGACATGCTCACTAGAAATTTCCAGGCAGCAACAGAGTCTGTTCCTAACTCTGGAAACATCCACAATGGCTCACTAGAGCAAACTGAAGAACCAGAAACCTCTTCTCACTCCCCATCCAGGCATATGAACCACTCAGAGCCCAGGCCAGGGCTAGGGGCTGATGGGGATGCTGCAGACCCCGTCGACACCAGAGACTCCAAATTTCTCCTCCCCAAGGAGGTGAATGGGAAGCAGCATGAGCCACTCTCTTTTTTCTTCGACATCATGCAGCACCATAAAGAAGGAAATGGCAAGCAGAAGCTGAGGCAGACCAGCAAGGCCAGCAGCGAGCCAGCGAGGAGGCCCACAGAGCCCCCCAAGGCCACAGAGGGGCAGTCCAAGCCCCGGGCCCAGACTGAGTCAAAGGCTAAGGTGGTCGACCTCATGTCCATCACAGAGCAGAAAAGAGAGGAGAGAAAAGTCAACAGTAATAACAATAACAAAAAGCAGCTGAACCACATCAAGGACGAAAAGTCAAACCCAACCCCTATGGAGCCCACCTCTCCCGGTGAGCATCAGCAGAACAGCAAGCTGGTGCTGGCAGAGTCCCCTCAGCCAAAGGGCAAGAACAAGAAAAATAAGAAGAAGAAAGGAGACAGAGTCAACAATTCAATTGATGGAGTTTCGCTCTTGTTGCCCAGTCTGGGGTACAATGGTGCAATCTTGGCTCACTGCAACCTTCGCCTCCCAGGTTCAAGCGATTGTGCTGCCTCAGCCTCCCAAGTAGTTGGAATTACAGATGATGTCTTTCTACCTAAAGATATTGACCTAGACAGTGTGGATATGGATGAGACAGAGAGGGAAGTGGAATATTTCAAAAGGTTCTGCTTGGATTCTGCTAGACAGACCCGACAAAGACTGTCTATCAACTGGTCCAATTTTAGCTTGAAAAAAGCCACCTTTGCTGCCCACTGAATGAGGACTCCCTGGAGAGGGACACGCGAGAGGCAGGCCAGGCTGCACCACCCCAAGAGCCACGCCCCTCGCTGGCGCCCCAGAGCCGTGGTGCTTGCCAAGGGCTGTGCGGAGCTGGTGCTGCCTGAAACCCCAGACCGAGAAGTTGATGCTCGGCCCACGCCGTTAGCTCGTGTGCGTGTAGTCTGTGCGTGAGACTCCTTCGATTGTAGCTCTGTGCTGTCGGATTGGAACAGTAGTTCCCGCCAAGTCCTCCCACCACCGCGGCCTCGGAGGCCTGGGCCGTGGCCAGATAGGAGTTTGCATCATCCACGTGGCTCCGTTGCCTCTGCATTGCGCCCTGTCCTGTCATGTGTCCTCACCGGGGTATCGGCCGTCACTCAGCTCTCCTGTGCCCCTGCGTCTCACCCTAGGCGGGCTGGGCGGGGCAGGCCTCCTTTGTTCTCCACAATCTACTGTCTCCGAGTGTACACGTTGCGCTGTTTGTGTTTGATCCCCCTGACTTGTAGCCAGCTTGTGTAAGATCCCTTGCAGAACGAGAAAGTTAAAAACAAGCCCACCCAGTACTCACACCATCAAGTCTGTTATAGAGTGTACGACTGTATTAACACGGAGGCCTGCCTGGCTACTTTTTTAACATATTGTTAAGTAATATTAAAATCATGTCTTTCTTTTTGAAAGATG