Shell

sed XML 解析選擇帶有特殊字元的標籤之間的文本,同時刪除其他塊

  • February 5, 2021

我正在嘗試解析由<JOB>and之間的塊組成的 XML </JOB>,我在 do while 循環中解析,在讀取 and 之間的塊<JOB></JOB>,我還在刪除塊並退出時將塊重定向到文件,我嘗試使用下面命令,但它似乎也在選擇相同模式中的所有其他塊,我為每個特殊字元使用轉義字元,但沒有得到所需的輸出,請你指出我哪裡出錯了:

sed '/\<JOB/,/\<\/JOB/!d;/\<\/JOB/q' /tmp/ftp_final >> /tmp/result_1

文本範例:

<JOB 
    ACTIVE_TILL="20141124" 
    APPLICATION="CTDS0320" 
    APPL_FORM="AFT" 
    APPL_TYPE="FILE_TRANS" 
    APPL_VER="6.1.01" 
    APR="1" 
    AUG="1" 
    AUTHOR="wyy" 
    AUTOARCH="0" 
    CHANGE_DATE="20190403" 
    CHANGE_TIME="141554" 
    CHANGE_USERID="ecsadmin" 
    CM_VER="610" 
    CONFIRM="0" 
    CREATION_DATE="20190328" 
    CREATION_TIME="105555" 
    CREATION_USER="singhm7" 
    CRITICAL="0" 
    CYCLIC="0" 
    CYCLIC_TOLERANCE="0" 
    CYCLIC_TYPE="Interval" 
    DAYS_AND_OR="OR" 
    DEC="1" 
    DESCRIPTION="Processes Source data files FTP from Source System" 
    DOCLIB="/prod/appl/ctds/Doc" 
    DOCMEM="energent_billing_delta.doc" 
    FEB="1" 
    GROUP="@CTDS-BW" 
    IND_CYCLIC="START" 
    INTERVAL="00000M" 
    JAN="1" 
    JOBNAME="energent_billing_delta-FTP" 
    JUL="1" 
    JUN="1" 
    MAR="1" 
    MAXDAYS="0" 
    MAXRERUN="0" 
    MAXRUNS="0" 
    MAXWAIT="7" 
    MAY="1" 
    MEMLIB="Not in use for application jobs" 
    MEMNAME="energent_billing_delta-FTP" 
    MULTY_AGENT="N" 
    NODEID="vprv0320" 
    NOV="1" 
    OCT="1" 
    OWNER="ctds_energent" 
    PARENT_TABLE="@CTDS" 
    PRIORITY="AA" 
    RETRO="0" 
    RULE_BASED_CALENDAR_RELATIONSHIP="OR" 
    SEP="1" 
    SHIFT="IGNOREJOB" 
    SHIFTNUM="+00" 
    SYSDB="0" 
    TASKTYPE="Job" 
    USE_INSTREAM_JCL="N" 
    WEEKSCAL="BCD-Y">
     <INCOND AND_OR="AND" NAME="CTDS0320-ENERGENT-BILLING-DELTA-OK" ODATE="ODAT"/>
     <OUTCOND NAME="CTDS0320-ENERGENT-BILLING-DELTA-FTP" ODATE="ODAT" SIGN="ADD"/>
     <AUTOEDIT2 NAME="%%FTP-ACCOUNT" VALUE="ctds_energent"/>
     <AUTOEDIT2 NAME="%%FTP-LOSTYPE" VALUE="Unix"/>
     <AUTOEDIT2 NAME="%%FTP-LUSER" VALUE="ctds"/>
     <AUTOEDIT2 NAME="%%FTP-ROSTYPE" VALUE="Windows"/>
     <AUTOEDIT2 NAME="%%FTP-RUSER" VALUE="controlm@enbridge.com"/>
     <AUTOEDIT2 NAME="%%FTP-LPATH1" VALUE="/prod/appl/ctds/Publish/ENERGENT/*"/>
     <AUTOEDIT2 NAME="%%FTP-RPATH1" VALUE="/\Business Intelligence-Gas Consumption\Energent\Outgoing\Gas Consumption\"/>
     <AUTOEDIT2 NAME="%%FTP-PATH" VALUE="Not in use for application jobs"/>
     <AUTOEDIT2 NAME="%%FTP-USE_DEF_NUMRETRIES" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-RPF" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-CLEAR_ALL" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-CONNTYPE2" VALUE="FTP"/>
     <AUTOEDIT2 NAME="%%FTP-CONNTYPE1" VALUE="LOCAL"/>
     <AUTOEDIT2 NAME="%%FTP-LHOST" VALUE="Local"/>
     <AUTOEDIT2 NAME="%%FTP-RHOST" VALUE="hdq-nt164"/>
     <AUTOEDIT2 NAME="%%FTP-LPASSIVE" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-RPASSIVE" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-UPLOAD1" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-UPLOAD2" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-UPLOAD3" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-UPLOAD4" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-UPLOAD5" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-TRANSFER_NUM" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-TYPE1" VALUE="A"/>
     <AUTOEDIT2 NAME="%%FTP-TYPE2" VALUE="I"/>
     <AUTOEDIT2 NAME="%%FTP-TYPE3" VALUE="I"/>
     <AUTOEDIT2 NAME="%%FTP-TYPE4" VALUE="I"/>
     <AUTOEDIT2 NAME="%%FTP-TYPE5" VALUE="I"/>
     <AUTOEDIT2 NAME="%%FTP-MINSIZE1" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-MINSIZE2" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-MINSIZE3" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-MINSIZE4" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-MINSIZE5" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-TIMELIMIT1" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-TIMELIMIT2" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-TIMELIMIT3" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-TIMELIMIT4" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-TIMELIMIT5" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-UNIQUE1" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-IF_EXIST1" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-IF_EXIST2" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-IF_EXIST3" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-IF_EXIST4" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-IF_EXIST5" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-SRCOPT1" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-SRCOPT2" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-SRCOPT3" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-SRCOPT4" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-SRCOPT5" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-DSTOPT1" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-DSTOPT2" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-DSTOPT3" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-DSTOPT4" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-DSTOPT5" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-ABSTIME1" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-ABSTIME2" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-ABSTIME3" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-ABSTIME4" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-ABSTIME5" VALUE="0"/>
     <AUTOEDIT2 NAME="%%FTP-TRIM1" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-TRIM2" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-TRIM3" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-TRIM4" VALUE="1"/>
     <AUTOEDIT2 NAME="%%FTP-TRIM5" VALUE="1"/>
   </JOB>

範常式式碼(不完整):

#!/bin/sh
set -xv
rm /tmp/result* /tmp/ftp_final
sed '/^$/d' /tmp/FTPJobs_table007.xml >> /tmp/ftp_final
lines=`cat /tmp/ftp_final | wc -l`
while [ $lines -gt 1 ]
do   
#typeset -i temp=`sed -n '5,5p' /tmp/jobs_xa_final | grep DESC | wc -l`
#temp2=`sed -n '5,5p' /tmp/jobs_xa_final | grep DESC`
#echo $temp
#echo $temp2
#echo "nodesc"
sed '/\<JOB/,/\<\/JOB/!d;/\<\/JOB/q' /tmp/ftp_final >> /tmp/result_1
JOBNAME=`cat /tmp/result_1 | grep "JOBNAME=" | cut -d"=" -f2 | cut -d"\"" -f2`
WORKSTATIONHASH="WA_SAGT#"
echo $WORKSTATIONHASH$JOBNAME >> /tmp/result_final
cat /tmp/header.txt >> /tmp/result_final
FTLSERVEROPENTAG="<jsdlfiletransfer:server>"
FTRSERVER=`cat /tmp/result_1 | grep "FTP-RHOST" | cut -d "=" -f3 | cut -d "\"" -f2`
FTLSERVERCLOSINGTAG="</jsdlfiletransfer:server>"

預期輸出:

WA_SAGT#MKT_AMER_FTP
TASK
   <?xml version="1.0" encoding="UTF-8"?>
<jsdl:jobDefinition xmlns:jsdl="http://www.ibm.com/xmlns/prod/scheduling/1.0/jsdl" xmlns:jsdlfiletransfer="http://www.ibm.com/xmlns/prod/scheduling/1.0/jsdlfiletransfer" name="filetransfer">
 <jsdl:application name="filetransfer">
   <jsdlfiletransfer:filetransfer>
           <jsdlfiletransfer:downloadInfo>
               <jsdlfiletransfer:server>ws94mdm0</jsdlfiletransfer:server>
               <jsdlfiletransfer:localfile>/tmp/leonjob2.txt</jsdlfiletransfer:localfile>
               <jsdlfiletransfer:remotefile>/tmp/leonjob.txt</jsdlfiletransfer:remotefile>
               <jsdlfiletransfer:localCredentials>
                   <jsdl:userName>wauser</jsdl:userName>
                   <jsdl:password>{aes}Gt+aN5nGz5dUBNaS/TJHoab0qwDZzII4gQPBi8AhPz0=</jsdl:password>
               </jsdlfiletransfer:localCredentials>
               <jsdlfiletransfer:remoteCredentials>
                   <jsdl:userName>wauser</jsdl:userName>
                   <jsdl:password>{aes}G7Rhyk5NkonXiV3AVSQ8pjVXElHgU0tE1RUveivv0xA=</jsdl:password>
               </jsdlfiletransfer:remoteCredentials>
               <jsdlfiletransfer:protocol>SSH</jsdlfiletransfer:protocol>
               <jsdlfiletransfer:transferMode>binary</jsdlfiletransfer:transferMode>
           </jsdlfiletransfer:downloadInfo>
       </jsdlfiletransfer:filetransfer>
 </jsdl:application>
</jsdl:jobDefinition>
DESCRIPTION "Added by composer."
RECOVERY STOP

問候, Sriram.V

xmlsed, grepor解析awk複雜且容易出錯。您應該使用專門的 XML 解析器,例如xmlstarlet.

不確定您的輸出應該是什麼樣子,但是從您的腳本中我認為它類似於以下內容。JOBNAME FTP_RHOST_VALUE這將為每個輸出一個列表JOB

xmlstarlet sel -t \
-m '//JOB' \
-v './@JOBNAME' \
-o $'\t' \
-v './AUTOEDIT2[@NAME="%%FTP-RHOST"]/@VALUE' \
-n \
/tmp/FTPJobs_table007.xml

範例文本的輸出:

energent_billing_delta-FTP  hdq-nt164

您可能需要安裝xmlstarlet,例如:

sudo apt install xmlstarlet

引用自:https://unix.stackexchange.com/questions/632609