Awk

如何根據使用 awk 的最後一行和下一行來減去或求和 12?

  • May 3, 2022

我有這個數據:

##sequence-region Q75T13 1 641
Q75T13,UniProtKB,Chain,1,641,.,.,.,ID
Q75T13,UniProtKB,Topological domain,1,60,.,.,.,Note=Cytoplasmic
Q75T13,UniProtKB,Transmembrane,61,85,.,.,.,Note=Helical
Q75T13,UniProtKB,Topological domain,86,641,.,.,.,Note=Lumenal


##sequence-region Q9BRR3 1 403
Q9BRR3,UniProtKB,Chain,1,403,.,.,.,ID
Q9BRR3,UniProtKB,Topological domain,1,22,.,.,.,Note=Lumenal
Q9BRR3,UniProtKB,Transmembrane,23,43,.,.,.,Note=Helical
Q9BRR3,UniProtKB,Topological domain,44,259,.,.,.,Note=Cytoplasmic

##sequence-region Q96FM1 1 250
Q96FM1,UniProtKB,Topological domain,120,135,.,.,.,Note=Cytoplasmic
Q96FM1,UniProtKB,Transmembrane,136,156,.,.,.,Note=Helical
Q96FM1,UniProtKB,Topological domain,157,169,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Transmembrane,170,190,.,.,.,Note=Helical
Q96FM1,UniProtKB,Topological domain,191,250,.,.,.,Note=Lumenal

我想知道 awk 程式碼會是什麼樣子:

具有單詞 lumenal 的行,如果在前一行中具有單詞 transmembrane,則在第 4 列中減去 -12 並列印具有單詞 lumenal 的行。如果帶有單詞 lumenal 的行在下一行有單詞“transmembrane”,則在第 5 列中添加 +12 並列印帶有單詞 lumenal 的行。最終文件將是:

Q75T13,UniProtKB,Topological domain,74,641,.,.,.,Note=Lumenal
Q9BRR3,UniProtKB,Topological domain,1,34,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Topological domain,145,169,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Topological domain,157,181,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Topological domain,179,250,.,.,.,Note=Lumenal

有人能幫我嗎?我有點卡住了。我正在嘗試使用 awk 和 grep

只需保留 3 行的滾動緩衝區並檢查:

$ cat tst.awk
BEGIN { FS=OFS="," }
{
   nxt = $0
   prt()
}
END {
   prt()
}

function prt() {
   if ( cur ~ /Lumenal/ ) {
       if ( pre ~ /Transmembrane/ ) {
           $0 = cur
           $4 -= 12
           print
       }

       if ( nxt ~ /Transmembrane/ ) {
           $0 = cur
           $5 += 12
           print
       }
   }

   pre = cur
   cur = nxt
   nxt = ""
}
$ awk -f tst.awk file
Q75T13,UniProtKB,Topological domain,74,641,.,.,.,Note=Lumenal
Q9BRR3,UniProtKB,Topological domain,1,34,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Topological domain,145,169,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Topological domain,157,181,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Topological domain,179,250,.,.,.,Note=Lumenal

嘗試以下命令:

root@u2004:~# cat test
##sequence-region Q75T13 1 641
Q75T13,UniProtKB,Chain,1,641,.,.,.,ID
Q75T13,UniProtKB,Topological domain,1,60,.,.,.,Note=Cytoplasmic
Q75T13,UniProtKB,Transmembrane,61,85,.,.,.,Note=Helical
Q75T13,UniProtKB,Topological domain,86,641,.,.,.,Note=Lumenal


##sequence-region Q9BRR3 1 403
Q9BRR3,UniProtKB,Chain,1,403,.,.,.,ID
Q9BRR3,UniProtKB,Topological domain,1,22,.,.,.,Note=Lumenal
Q9BRR3,UniProtKB,Transmembrane,23,43,.,.,.,Note=Helical
Q9BRR3,UniProtKB,Topological domain,44,259,.,.,.,Note=Cytoplasmic

##sequence-region Q96FM1 1 250
Q96FM1,UniProtKB,Topological domain,120,135,.,.,.,Note=Cytoplasmic
Q96FM1,UniProtKB,Transmembrane,136,156,.,.,.,Note=Helical
Q96FM1,UniProtKB,Topological domain,157,169,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Transmembrane,170,190,.,.,.,Note=Helical
Q96FM1,UniProtKB,Topological domain,191,250,.,.,.,Note=Lumenal
root@u2004:~# 
root@u2004:~# awk -F, -v OFS=, '{while(1){if($0~/Lumenal/){a=$0; $4-=12;p=$0; $0=a;$5+=12;n=$0; if(index(pre,"Transmembrane")>0)print p; if(getline>0){if(index($0,"Transmembrane"))print n; if($0~/Lumenal/){pre=$0; continue}}} break}} {pre=$0}' test
Q75T13,UniProtKB,Topological domain,74,641,.,.,.,Note=Lumenal
Q9BRR3,UniProtKB,Topological domain,1,34,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Topological domain,145,169,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Topological domain,157,181,.,.,.,Note=Lumenal
Q96FM1,UniProtKB,Topological domain,179,250,.,.,.,Note=Lumenal
root@u2004:~#

引用自:https://unix.stackexchange.com/questions/701151