Count number of different occurrences in a string by UNIX along one column into a file Count number of different occurrences in a string by UNIX along one column into a file unix unix

Count number of different occurrences in a string by UNIX along one column into a file


Your input doesn't match your output so we're all just guessing but this might be what you want:

$ cat tst.awkBEGIN { FS=OFS="\t" }{    delete cnt    split($2,tmp,/ /)    for (i in tmp) {        str = tmp[i]        cnt[str]++    }    printf "%s", $0    sep = OFS    for (str in cnt) {        printf "%s%s=%d", sep, str, cnt[str]        sep = ";"    }    print ""}

Depending on what your input really is the above will output the following:

$ cat filers12255619 A/C chr10    AA AA AC AA AA AA AA AA AA AC AArs7909677 A/G chr10     AA AA AA AA AA AA AA AA AA AA AA$ awk -f tst.awk filers12255619 A/C chr10    AA AA AC AA AA AA AA AA AA AC AA        AA=9;AC=2rs7909677 A/G chr10     AA AA AA AA AA AA AA AA AA AA AA        AA=11$ cat filers12255619 A/C chr10    AA AA AC AA AA AA AA AA AA AC AArs7909677 A/G chr10     AA AA AA AA AA AA AA AA AA AA CC$ awk -f tst.awk filers12255619 A/C chr10    AA AA AC AA AA AA AA AA AA AC AA        AA=9;AC=2rs7909677 A/G chr10     AA AA AA AA AA AA AA AA AA AA CC        AA=10;CC=1


something like this?

$ awk '{for(i=4;i<=NF;i++) c[$i]++;         for(k in c) {s=s sep k"="c[k]; sep=";"; c[k]=0}         $NF=$NF OFS s; s=sep=""}1' file | column -trs12255619  A/C  chr10  AA  AA  AC  AA  AA  AA  AA  AA  AA  AC  AA  AA=9;AC=2rs7909677   A/G  chr10  AA  AA  AA  AA  AA  AA  AA  AA  AA  AA  AA  AA=11;AC=0

note that the captured letters are progressively increasing since only the observed keys up to a row will be printed. For example if you had CC in the second row, the count won't be listed in the first line.


Could do it in perl

perl -lpe '$a{$_}++ for /\b[A-Z]{2}\b/g;           $_.=" ".join(";",map{"$_=$a{$_}"}keys%a);           %a = map{$_=>0}keys%a' file

produces

rs12255619 A/C chr10    AA AA AC AA AA AA AA AA AA AC AA AA=9;AC=2rs7909677 A/G chr10     AA AA AA AA AA AA AA AA AA AA CC AA=10;CC=1;AC=0

For new requirement

perl -lpe '$a{$_}++ for /\b[A-Z]{2}\b/g;           $_.=" ".join(";",map{"$_=$a{$_}"}keys%a);           undef %a' file

produces

rs12255619 A/C chr10    AA AA AC AA AA AA AA AA AA AC AA AC=2;AA=9rs7909677 A/G chr10     AA AA AA AA AA AA AA AA AA AA CC CC=1;AA=10