Regular-Expression

regmatch_t 數組中未使用的結構元素不是 -1

  • November 16, 2019

regcomp 和 regexec的手冊頁指出,“任何未使用的結構元素都將包含值 -1。”

然而,在我最後一場比賽之後檢查值的邏輯中rm_so,情況似乎並非如此。它似乎等於0:

#include <stdlib.h>                                              
#include <stdio.h>                                               
#include <string.h>                                              
#include <sys/types.h>                                           
#include <regex.h>                                                                                         

#define MATCH_CNT   4                                            


void print_matches(regmatch_t matches[])                         
{                                                                
 printf("Regexec successful.\n");                               
 int i = 0;                                                     
 for(; i < MATCH_CNT; i++)                                      
 {                            
   //########### HERE'S THE RUB ####################                                  
   if(matches[i].rm_so != -1)                                    
   {                                                            
     printf("Match %i; Beginning:\t%i\n", i, matches[i].rm_so); 
     printf("Match %i; End:\t\t%i\n", i, matches[i].rm_eo);     
   }                                                            
 }                                                              
}                                                                

int main(int argc, char *argv[])                                 
{                                                                
 if(argc != 3)                                                  
   fprintf(stderr, "retester <pattern> <string>\n");            

 char *pat = argv[1];                                           
 char *str = argv[2];                                           

 regex_t compreg;                                               
 memset(&compreg, 0, sizeof(regex_t));                          

 regmatch_t matches[MATCH_CNT];                                 
 memset(&matches, 0, MATCH_CNT*sizeof(regmatch_t));           
 int matchcnt = -1;                                             

 char *errbuff;                                                 

printf("Trying to match with extended regex...\n");                  
int compret = -2;                                                        
//set REG_EXTENDED flag                                              
if((compret = regcomp(&compreg, pat, REG_EXTENDED)) == 0)            
{                                                                    
  printf("Compiling successful.\n");                                 
  int execret = -2;                                                  
  if((execret = regexec(&compreg, str, matchcnt, matches, 0)) == 0)  
  {                                                                  
    print_matches(matches);                                          
  }                                                                  
  else if(execret != 0)                                              
  {                                                                  
    printf("Regexec failed.\n");                                     
    size_t errbuffsz = regerror(execret, &compreg, 0, 0);            
    errbuff = malloc(errbuffsz);                                     
    memset(errbuff, '\0', errbuffsz);                                
    regerror(execret, &compreg, errbuff, errbuffsz);                 
    fprintf(stderr, "Regexec error: %s\n", errbuff);                 
  }                                                                  
}                                                                    
else                                                                 
{                                                                    
  printf("Compiling failed.\n");                                     
  size_t errbuffsz = regerror(compret, &compreg, 0, 0);              
  errbuff = malloc(errbuffsz);                                       
  memset(errbuff, '\0', errbuffsz);                                  
  regerror(compret, &compreg, errbuff, errbuffsz);                   
  fprintf(stderr, "Regexec error: %s", errbuff);                     
}                                                                    
free(errbuff);                                                       
errbuff = NULL;                                                      
regfree(&compreg);                                                   
return 0;                                                            
}

這是上面的輸出。我對模式的理解是它只有一個匹配組。根據文件,這意味著我應該看到兩個填充regmatch_t元素:第一個包含整個字元串,第二個包含匹配的組。

[chb]$ gcc -g -o foo foo.c                                                
[chb]$ ./foo  "^ROOM\s{1}NAME:\s{1}([[:alpha:]]{6})" "ROOM NAME: BriCol"
Trying to match with extended regex...                                                      
Compiling successful.                                                                       
Regexec successful.                                                                         
Match 0; Beginning:     0                                                                   
Match 0; End:           17                                                                  
Match 1; Beginning:     11                                                                  
Match 1; End:           17                                                                  
Match 2; Beginning:     0                                                                   
Match 2; End:           0                                                                   
Match 3; Beginning:     0                                                                   
Match 3; End:           0                                                                                                                                                                                                           

我什至註釋掉了memset將數組清零的內容,認為regexec命令內部的邏輯在某種程度上受到了它的影響。

如果你想regexec用 -1 填充未使用的數組元素,你需要首先告訴它有多少。在此行中替換matchcnt為:MATCH_CNT

if((execret = regexec(&compreg, str, matchcnt, matches, 0)) == 0)

所以它變成了

if((execret = regexec(&compreg, str, MATCH_CNT, matches, 0)) == 0)

int(並在之前添加缺少的compret = -2;內容)並且您的程序將按預期工作:

$ ./549805 "^ROOM\s{1}NAME:\s{1}([[:alpha:]]{6})" "ROOM NAME: BriCol"
Trying to match with extended regex...
Compiling successful.
Regexec successful.
Match 0; Beginning: 0
Match 0; End:       17
Match 1; Beginning: 11
Match 1; End:       17
$

您還應該初始化errbuffNULL,以避免free()使用未定義的值呼叫(何時errbuff從未初始化)。當你在做的時候,檢查malloc()的返回值——分配可能會失敗。

引用自:https://unix.stackexchange.com/questions/549805