regex_mod.c

Go to the documentation of this file.
00001 /*
00002  * $Id$
00003  *
00004  * regex module - pcre operations
00005  *
00006  * Copyright (C) 2008 Iñaki Baz Castillo
00007  *
00008  * This file is part of Kamailio, a free SIP server.
00009  *
00010  * Kamailio is free software; you can redistribute it and/or modify
00011  * it under the terms of the GNU General Public License as published by
00012  * the Free Software Foundation; either version 2 of the License, or
00013  * (at your option) any later version
00014  *
00015  * Kamailio is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  * GNU General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU General Public License 
00021  * along with this program; if not, write to the Free Software 
00022  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023  *
00024  * History:
00025  * --------
00026  *  2011-02-22  pcre_match_group() allows now pseudo-variable as group argument.
00027  *  2009-01-14  initial version (Iñaki Baz Castillo).
00028  */
00029 
00030 
00038 #include <stdio.h>
00039 #include <stdlib.h>
00040 #include <string.h>
00041 #include <sys/stat.h>
00042 #include <pcre.h>
00043 #include "../../sr_module.h"
00044 #include "../../dprint.h"
00045 #include "../../pt.h"
00046 #include "../../mem/shm_mem.h"
00047 #include "../../str.h"
00048 #include "../../locking.h"
00049 #include "../../mod_fix.h"
00050 #include "../../lib/kmi/mi.h"
00051 
00052 MODULE_VERSION
00053 
00054 #define START 0
00055 #define RELOAD 1
00056 
00057 #define FILE_MAX_LINE 500        
00058 #define MAX_GROUPS 20            
00059 #define GROUP_MAX_SIZE 8192      
00062 /*
00063  * Locking variables
00064  */
00065 gen_lock_t *reload_lock;
00066 
00067 
00068 /*
00069  * Module exported parameter variables
00070  */
00071 static char *file;
00072 static int max_groups            = MAX_GROUPS;
00073 static int group_max_size        = GROUP_MAX_SIZE;
00074 static int pcre_caseless         = 0;
00075 static int pcre_multiline        = 0;
00076 static int pcre_dotall           = 0;
00077 static int pcre_extended         = 0;
00078 
00079 
00080 /*
00081  * Module internal parameter variables
00082  */
00083 static pcre **pcres;
00084 static pcre ***pcres_addr;
00085 static int *num_pcres;
00086 static int pcre_options = 0x00000000;
00087 
00088 
00089 /*
00090  * Module core functions
00091  */
00092 static int mod_init(void);
00093 static void destroy(void);
00094 
00095 
00096 /*
00097  * Module internal functions
00098  */
00099 static int load_pcres(int);
00100 static void free_shared_memory(void);
00101 
00102 
00103 /*
00104  * Script functions
00105  */
00106 static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2);
00107 static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2);
00108 
00109 
00110 /*
00111  * MI functions
00112  */
00113 static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param);
00114 
00115 
00116 /*
00117  * Exported functions
00118  */
00119 static cmd_export_t cmds[] =
00120 {
00121         { "pcre_match", (cmd_function)w_pcre_match, 2, fixup_spve_spve, 0,
00122                 REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
00123         { "pcre_match_group", (cmd_function)w_pcre_match_group, 2, fixup_spve_spve, 0,
00124                 REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
00125         { "pcre_match_group", (cmd_function)w_pcre_match_group, 1, fixup_spve_null, 0,
00126                 REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
00127         { 0, 0, 0, 0, 0, 0 }
00128 };
00129 
00130 
00131 /*
00132  * Exported parameters
00133  */
00134 static param_export_t params[] = {
00135         {"file",                STR_PARAM,  &file                },
00136         {"max_groups",          INT_PARAM,  &max_groups          },
00137         {"group_max_size",      INT_PARAM,  &group_max_size      },
00138         {"pcre_caseless",       INT_PARAM,  &pcre_caseless       },
00139         {"pcre_multiline",      INT_PARAM,  &pcre_multiline      },
00140         {"pcre_dotall",         INT_PARAM,  &pcre_dotall         },
00141         {"pcre_extended",       INT_PARAM,  &pcre_extended       },
00142         {0, 0, 0}
00143 };
00144 
00145 
00146 /*
00147  * Exported MI functions
00148  */
00149 static mi_export_t mi_cmds[] = {
00150         { "regex_reload", mi_pcres_reload, MI_NO_INPUT_FLAG, 0, 0 },
00151         { 0, 0, 0, 0 ,0 }
00152 };
00153 
00154 
00155 /*
00156  * Module interface
00157  */
00158 struct module_exports exports = {
00159         "regex",                   
00160         DEFAULT_DLFLAGS,           
00161         cmds,                      
00162         params,                    
00163         0,                         
00164         mi_cmds,                   
00165         0,                         
00166         0,                         
00167         mod_init,                  
00168         (response_function) 0,     
00169         destroy,                   
00170         0                          
00171 };
00172 
00173 
00174 
00178 static int mod_init(void)
00179 {
00180         if(register_mi_mod(exports.name, mi_cmds)!=0)
00181         {
00182                 LM_ERR("failed to register MI commands\n");
00183                 return -1;
00184         }
00185 
00186         /* Group matching feature */
00187         if (file == NULL) {
00188                 LM_NOTICE("'file' parameter is not set, group matching disabled\n");
00189         } else {
00190                 /* Create and init the lock */
00191                 reload_lock = lock_alloc();
00192                 if (reload_lock == NULL) {
00193                         LM_ERR("cannot allocate reload_lock\n");
00194                         goto err;
00195                 }
00196                 if (lock_init(reload_lock) == NULL) {
00197                         LM_ERR("cannot init the reload_lock\n");
00198                         lock_dealloc(reload_lock);
00199                         goto err;
00200                 }
00201                 
00202                 /* PCRE options */
00203                 if (pcre_caseless != 0) {
00204                         LM_DBG("PCRE CASELESS enabled\n");
00205                         pcre_options = pcre_options | PCRE_CASELESS;
00206                 }
00207                 if (pcre_multiline != 0) {
00208                         LM_DBG("PCRE MULTILINE enabled\n");
00209                         pcre_options = pcre_options | PCRE_MULTILINE;
00210                 }
00211                 if (pcre_dotall != 0) {
00212                         LM_DBG("PCRE DOTALL enabled\n");
00213                         pcre_options = pcre_options | PCRE_DOTALL;
00214                 }
00215                 if (pcre_extended != 0) {
00216                         LM_DBG("PCRE EXTENDED enabled\n");
00217                         pcre_options = pcre_options | PCRE_EXTENDED;
00218                 }
00219                 LM_DBG("PCRE options: %i\n", pcre_options);
00220                 
00221                 /* Pointer to pcres */
00222                 if ((pcres_addr = shm_malloc(sizeof(pcre **))) == 0) {
00223                         LM_ERR("no memory for pcres_addr\n");
00224                         goto err;
00225                 }
00226                 
00227                 /* Integer containing the number of pcres */
00228                 if ((num_pcres = shm_malloc(sizeof(int))) == 0) {
00229                         LM_ERR("no memory for num_pcres\n");
00230                         goto err;
00231                 }
00232                 
00233                 /* Load the pcres */
00234                 LM_DBG("loading pcres...\n");
00235                 if (load_pcres(START)) {
00236                         LM_ERR("failed to load pcres\n");
00237                         goto err;
00238                 }
00239         }
00240         
00241         return 0;
00242         
00243 err:
00244         free_shared_memory();
00245         return -1;
00246 }
00247 
00248 
00249 static void destroy(void)
00250 {       
00251         free_shared_memory();
00252 }
00253 
00254 
00256 static int load_pcres(int action)
00257 {
00258         int i, j;
00259         FILE *f;
00260         char line[FILE_MAX_LINE];
00261         char **patterns = NULL;
00262         pcre *pcre_tmp = NULL;
00263         int pcre_size;
00264         int pcre_rc;
00265         const char *pcre_error;
00266         int pcre_erroffset;
00267         int num_pcres_tmp = 0;
00268         pcre **pcres_tmp = NULL;
00269         
00270         /* Get the lock */
00271         lock_get(reload_lock);
00272         
00273         if (!(f = fopen(file, "r"))) {
00274                 LM_ERR("could not open file '%s'\n", file);
00275                 goto err;
00276         }
00277         
00278         /* Array containing each pattern in the file */
00279         if ((patterns = pkg_malloc(sizeof(char*) * max_groups)) == 0) {
00280                 LM_ERR("no more memory for patterns\n");
00281                 fclose(f);
00282                 goto err;
00283         }
00284         for (i=0; i<max_groups; i++) {
00285                 patterns[i] = NULL;
00286         }
00287         for (i=0; i<max_groups; i++) {
00288                 if ((patterns[i] = pkg_malloc(sizeof(char) * group_max_size)) == 0) {
00289                         LM_ERR("no more memory for patterns[%d]\n", i);
00290                         fclose(f);
00291                         goto err;
00292                 }
00293                 memset(patterns[i], '\0', group_max_size);
00294         }
00295         
00296         /* Read the file and extract the patterns */
00297         memset(line, '\0', FILE_MAX_LINE);
00298         i = -1;
00299         while (fgets(line, FILE_MAX_LINE, f) != NULL) {
00300                 
00301                 /* Ignore comments and lines starting by space, tab, CR, LF */
00302                 if(isspace(line[0]) || line[0]=='#') {
00303                         memset(line, '\0', FILE_MAX_LINE);
00304                         continue;
00305                 }
00306                 
00307                 /* First group */
00308                 if (i == -1 && line[0] != '[') {
00309                         LM_ERR("first group must be initialized with [0] before any regular expression\n");
00310                         fclose(f);
00311                         goto err;
00312                 }
00313                 
00314                 /* New group */
00315                 if (line[0] == '[') {
00316                         i++;
00317                         /* Check if there are more patterns than the max value */
00318                         if (i >= max_groups) {
00319                                 LM_ERR("max patterns exceeded\n");
00320                                 fclose(f);
00321                                 goto err;
00322                         }
00323                         /* Start the regular expression with '(' */
00324                         patterns[i][0] = '(';
00325                         memset(line, '\0', FILE_MAX_LINE);
00326                         continue;
00327                 }
00328                 
00329                 /* Check if the patter size is too big (aprox) */
00330                 if (strlen(patterns[i]) + strlen(line) >= group_max_size - 2) {
00331                         LM_ERR("pattern max file exceeded\n");
00332                         fclose(f);
00333                         goto err;
00334                 }
00335                 
00336                 /* Append ')' at the end of the line */
00337                 if (line[strlen(line) - 1] == '\n') {
00338                         line[strlen(line)] = line[strlen(line) - 1];
00339                         line[strlen(line) - 2] = ')';
00340                 } else {
00341                         /* This is the last char in the file and it's not \n */
00342                         line[strlen(line)] = ')';
00343                 }
00344                 
00345                 /* Append '(' at the beginning of the line */
00346                 memcpy(patterns[i]+strlen(patterns[i]), "(", 1);
00347                 
00348                 /* Append the line to the current pattern */
00349                 memcpy(patterns[i]+strlen(patterns[i]), line, strlen(line));
00350                 
00351                 memset(line, '\0', FILE_MAX_LINE);
00352         }
00353         num_pcres_tmp = i + 1;
00354         
00355         fclose(f);
00356         
00357         /* Fix the patterns */
00358         for (i=0; i < num_pcres_tmp; i++) {
00359                 
00360                 /* Convert empty groups in unmatcheable regular expression ^$ */
00361                 if (strlen(patterns[i]) == 1) {
00362                         patterns[i][0] = '^';
00363                         patterns[i][1] = '$';
00364                         patterns[i][2] = '\0';
00365                         continue;
00366                 }
00367                 
00368                 /* Delete possible '\n' at the end of the pattern */
00369                 if (patterns[i][strlen(patterns[i])-1] == '\n') {
00370                         patterns[i][strlen(patterns[i])-1] = '\0';
00371                 }
00372                 
00373                 /* Replace '\n' with '|' (except at the end of the pattern) */
00374                 for (j=0; j < strlen(patterns[i]); j++) {
00375                         if (patterns[i][j] == '\n' && j != strlen(patterns[i])-1) {
00376                                 patterns[i][j] = '|';
00377                         }
00378                 }
00379                 
00380                 /* Add ')' at the end of the pattern */
00381                 patterns[i][strlen(patterns[i])] = ')';
00382         }
00383         
00384         /* Log the group patterns */
00385         LM_INFO("num groups = %d\n", num_pcres_tmp);
00386         for (i=0; i < num_pcres_tmp; i++) {
00387                 LM_INFO("<group[%d]>%s</group[%d]> (size = %i)\n", i, patterns[i], i, (int)strlen(patterns[i]));
00388         }
00389         
00390         /* Temporal pointer of pcres */
00391         if ((pcres_tmp = pkg_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
00392                 LM_ERR("no more memory for pcres_tmp\n");
00393                 goto err;
00394         }
00395         for (i=0; i<num_pcres_tmp; i++) {
00396                 pcres_tmp[i] = NULL;
00397         }
00398         
00399         /* Compile the patters */
00400         for (i=0; i<num_pcres_tmp; i++) {
00401         
00402                 pcre_tmp = pcre_compile(patterns[i], pcre_options, &pcre_error, &pcre_erroffset, NULL);
00403                 if (pcre_tmp == NULL) {
00404                         LM_ERR("pcre_tmp compilation of '%s' failed at offset %d: %s\n", patterns[i], pcre_erroffset, pcre_error);
00405                         goto err;
00406                 }
00407                 pcre_rc = pcre_fullinfo(pcre_tmp, NULL, PCRE_INFO_SIZE, &pcre_size);
00408                 if (pcre_rc) {
00409                         printf("pcre_fullinfo on compiled pattern[%i] yielded error: %d\n", i, pcre_rc);
00410                         goto err;
00411                 }
00412                 
00413                 if ((pcres_tmp[i] = pkg_malloc(pcre_size)) == 0) {
00414                         LM_ERR("no more memory for pcres_tmp[%i]\n", i);
00415                         goto err;
00416                 }
00417                 
00418                 memcpy(pcres_tmp[i], pcre_tmp, pcre_size);
00419                 pcre_free(pcre_tmp);
00420                 pkg_free(patterns[i]);
00421         }
00422         
00423         /* Copy to shared memory */
00424         if (action == RELOAD) {
00425                 for(i=0; i<*num_pcres; i++) {  /* Use the previous num_pcres value */
00426                         if (pcres[i]) {
00427                                 shm_free(pcres[i]);
00428                         }
00429                 }
00430                 shm_free(pcres);
00431         }
00432         if ((pcres = shm_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
00433                 LM_ERR("no more memory for pcres\n");
00434                 goto err;
00435         }
00436         for (i=0; i<num_pcres_tmp; i++) {
00437                 pcres[i] = NULL;
00438         }
00439         for (i=0; i<num_pcres_tmp; i++) {
00440                 pcre_rc = pcre_fullinfo(pcres_tmp[i], NULL, PCRE_INFO_SIZE, &pcre_size);
00441                 if ((pcres[i] = shm_malloc(pcre_size)) == 0) {
00442                         LM_ERR("no more memory for pcres[%i]\n", i);
00443                         goto err;
00444                 }
00445                 memcpy(pcres[i], pcres_tmp[i], pcre_size);
00446         }
00447         *num_pcres = num_pcres_tmp;
00448         *pcres_addr = pcres;
00449 
00450         /* Free used memory */
00451         for (i=0; i<num_pcres_tmp; i++) {
00452                 pkg_free(pcres_tmp[i]);
00453         }
00454         pkg_free(pcres_tmp);
00455         pkg_free(patterns);
00456         lock_release(reload_lock);
00457         
00458         return 0;
00459         
00460         
00461 err:
00462         if (patterns) {
00463                 for(i=0; i<max_groups; i++) {
00464                         if (patterns[i]) {
00465                                 pkg_free(patterns[i]);
00466                         }
00467                 }
00468                 pkg_free(patterns);
00469         }
00470         if (pcres_tmp) {
00471                 for (i=0; i<num_pcres_tmp; i++) {
00472                         if (pcres_tmp[i]) {
00473                                 pkg_free(pcres_tmp[i]);
00474                         }
00475                 }
00476                 pkg_free(pcres_tmp);
00477         }
00478         if (reload_lock) {
00479                 lock_release(reload_lock);
00480         }
00481         if (action == START) {
00482                 free_shared_memory();
00483         }
00484         return -1;
00485 }
00486 
00487 
00488 static void free_shared_memory(void)
00489 {
00490         int i;
00491         
00492         if (pcres) {
00493                 for(i=0; i<*num_pcres; i++) {
00494                         if (pcres[i]) {
00495                                 shm_free(pcres[i]);
00496                         }
00497                 }
00498                 shm_free(pcres);
00499                 pcres = NULL;
00500         }
00501         
00502         if (num_pcres) {
00503                 shm_free(num_pcres);
00504                 num_pcres = NULL;
00505         }
00506         
00507         if (pcres_addr) {
00508                 shm_free(pcres_addr);
00509                 pcres_addr = NULL;
00510         }
00511         
00512         if (reload_lock) {
00513                 lock_destroy(reload_lock);
00514                 lock_dealloc(reload_lock);
00515                 reload_lock = NULL;
00516     }
00517 }
00518 
00519 
00520 /*
00521  * Script functions
00522  */
00523 
00525 static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2)
00526 {
00527         str string;
00528         str regex;
00529         pcre *pcre_re = NULL;
00530         int pcre_rc;
00531         const char *pcre_error;
00532         int pcre_erroffset;
00533         
00534         if (_s1 == NULL) {
00535                 LM_ERR("bad parameters\n");
00536                 return -2;
00537         }
00538         
00539         if (_s2 == NULL) {
00540                 LM_ERR("bad parameters\n");
00541                 return -2;
00542         }
00543         
00544         if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
00545         {
00546                 LM_ERR("cannot print the format for string\n");
00547                 return -3;
00548         }
00549         if (fixup_get_svalue(_msg, (gparam_p)_s2, &regex))
00550         {
00551                 LM_ERR("cannot print the format for regex\n");
00552                 return -3;
00553         }
00554         
00555         pcre_re = pcre_compile(regex.s, pcre_options, &pcre_error, &pcre_erroffset, NULL);
00556         if (pcre_re == NULL) {
00557                 LM_ERR("pcre_re compilation of '%s' failed at offset %d: %s\n", regex.s, pcre_erroffset, pcre_error);
00558                 return -4;
00559         }
00560         
00561         pcre_rc = pcre_exec(
00562                 pcre_re,                    /* the compiled pattern */
00563                 NULL,                       /* no extra data - we didn't study the pattern */
00564                 string.s,                   /* the matching string */
00565                 (int)(string.len),          /* the length of the subject */
00566                 0,                          /* start at offset 0 in the string */
00567                 0,                          /* default options */
00568                 NULL,                       /* output vector for substring information */
00569                 0);                         /* number of elements in the output vector */
00570         
00571         /* Matching failed: handle error cases */
00572         if (pcre_rc < 0) {
00573                 switch(pcre_rc) {
00574                         case PCRE_ERROR_NOMATCH:
00575                                 LM_DBG("'%s' doesn't match '%s'\n", string.s, regex.s);
00576                                 break;
00577                         default:
00578                                 LM_DBG("matching error '%d'\n", pcre_rc);
00579                                 break;
00580                 }
00581                 pcre_free(pcre_re);
00582                 return -1;
00583         }
00584         pcre_free(pcre_re);
00585         LM_DBG("'%s' matches '%s'\n", string.s, regex.s);
00586         return 1;
00587 }
00588 
00589 
00591 static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2)
00592 {
00593         str string, group;
00594         unsigned int num_pcre;
00595         int pcre_rc;
00596         
00597         /* Check if group matching feature is enabled */
00598         if (file == NULL) {
00599                 LM_ERR("group matching is disabled\n");
00600                 return -2;
00601         }
00602         
00603         if (_s1 == NULL) {
00604                 LM_ERR("bad parameters\n");
00605                 return -3;
00606         }
00607         
00608         if (_s2 == NULL) {
00609                 num_pcre = 0;
00610         } else {
00611                 if (fixup_get_svalue(_msg, (gparam_p)_s2, &group))
00612                 {
00613                         LM_ERR("cannot print the format for second param\n");
00614                         return -5;
00615                 }
00616                 str2int(&group, &num_pcre);
00617         }
00618         
00619         if (num_pcre >= *num_pcres) {
00620                 LM_ERR("invalid pcre index '%i', there are %i pcres\n", num_pcre, *num_pcres);
00621                 return -4;
00622         }
00623         
00624         if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
00625         {
00626                 LM_ERR("cannot print the format for first param\n");
00627                 return -5;
00628         }
00629         
00630         lock_get(reload_lock);
00631         
00632         pcre_rc = pcre_exec(
00633                 (*pcres_addr)[num_pcre],    /* the compiled pattern */
00634                 NULL,                       /* no extra data - we didn't study the pattern */
00635                 string.s,                   /* the matching string */
00636                 (int)(string.len),          /* the length of the subject */
00637                 0,                          /* start at offset 0 in the string */
00638                 0,                          /* default options */
00639                 NULL,                       /* output vector for substring information */
00640                 0);                         /* number of elements in the output vector */
00641         
00642         lock_release(reload_lock);
00643         
00644         /* Matching failed: handle error cases */
00645         if (pcre_rc < 0) {
00646                 switch(pcre_rc) {
00647                         case PCRE_ERROR_NOMATCH:
00648                                 LM_DBG("'%s' doesn't match pcres[%i]\n", string.s, num_pcre);
00649                                 break;
00650                         default:
00651                                 LM_DBG("matching error '%d'\n", pcre_rc);
00652                                 break;
00653                 }
00654                 return -1;
00655         }
00656         else {
00657                 LM_DBG("'%s' matches pcres[%i]\n", string.s, num_pcre);
00658                 return 1;
00659         }
00660         
00661 }
00662 
00663 
00664 /*
00665  * MI functions
00666  */
00667 
00669 static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param)
00670 {
00671         /* Check if group matching feature is enabled */
00672         if (file == NULL) {
00673                 LM_NOTICE("'file' parameter is not set, group matching disabled\n");
00674                 return init_mi_tree(403, MI_SSTR("Group matching not enabled"));
00675         }
00676         
00677         LM_INFO("reloading pcres...\n");
00678         if (load_pcres(RELOAD)) {
00679                 LM_ERR("failed to reload pcres\n");
00680                 return init_mi_tree(500, MI_INTERNAL_ERR_S, MI_INTERNAL_ERR_LEN);
00681         }
00682         LM_INFO("reload success\n");
00683         return init_mi_tree(200, MI_OK_S, MI_OK_LEN);
00684 }