regexec.c (5048B)
1 #include <stdlib.h> 2 #include "plan9.h" 3 #include "regexp9.h" 4 #include "regcomp.h" 5 6 7 /* 8 * return 0 if no match 9 * >0 if a match 10 * <0 if we ran out of _relist space 11 */ 12 static int 13 regexec1(Reprog *progp, /* program to run */ 14 char *bol, /* string to run machine on */ 15 Resub *mp, /* subexpression elements */ 16 int ms, /* number of elements at mp */ 17 Reljunk *j 18 ) 19 { 20 int flag=0; 21 Reinst *inst; 22 Relist *tlp; 23 char *s; 24 int i, checkstart; 25 Rune r, *rp, *ep; 26 int n; 27 Relist* tl; /* This list, next list */ 28 Relist* nl; 29 Relist* tle; /* ends of this and next list */ 30 Relist* nle; 31 int match; 32 char *p; 33 34 match = 0; 35 checkstart = j->starttype; 36 if(mp) 37 for(i=0; i<ms; i++) { 38 mp[i].s.sp = 0; 39 mp[i].e.ep = 0; 40 } 41 j->relist[0][0].inst = 0; 42 j->relist[1][0].inst = 0; 43 44 /* Execute machine once for each character, including terminal NUL */ 45 s = j->starts; 46 do{ 47 /* fast check for first char */ 48 if(checkstart) { 49 switch(j->starttype) { 50 case RUNE: 51 p = utfrune(s, j->startchar); 52 if(p == 0 || s == j->eol) 53 return match; 54 s = p; 55 break; 56 case BOL: 57 if(s == bol) 58 break; 59 p = utfrune(s, '\n'); 60 if(p == 0 || s == j->eol) 61 return match; 62 s = p; 63 break; 64 } 65 } 66 r = *(uchar*)s; 67 if(r < Runeself) 68 n = 1; 69 else 70 n = chartorune(&r, s); 71 72 /* switch run lists */ 73 tl = j->relist[flag]; 74 tle = j->reliste[flag]; 75 nl = j->relist[flag^=1]; 76 nle = j->reliste[flag]; 77 nl->inst = 0; 78 79 /* Add first instruction to current list */ 80 if(match == 0) 81 _renewemptythread(tl, progp->startinst, ms, s); 82 83 /* Execute machine until current list is empty */ 84 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ 85 for(inst = tlp->inst; ; inst = inst->u2.next){ 86 switch(inst->type){ 87 case RUNE: /* regular character */ 88 if(inst->u1.r == r){ 89 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 90 return -1; 91 } 92 break; 93 case LBRA: 94 tlp->se.m[inst->u1.subid].s.sp = s; 95 continue; 96 case RBRA: 97 tlp->se.m[inst->u1.subid].e.ep = s; 98 continue; 99 case ANY: 100 if(r != '\n') 101 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 102 return -1; 103 break; 104 case ANYNL: 105 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 106 return -1; 107 break; 108 case BOL: 109 if(s == bol || *(s-1) == '\n') 110 continue; 111 break; 112 case EOL: 113 if(s == j->eol || r == 0 || r == '\n') 114 continue; 115 break; 116 case CCLASS: 117 ep = inst->u1.cp->end; 118 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 119 if(r >= rp[0] && r <= rp[1]){ 120 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 121 return -1; 122 break; 123 } 124 break; 125 case NCCLASS: 126 ep = inst->u1.cp->end; 127 for(rp = inst->u1.cp->spans; rp < ep; rp += 2) 128 if(r >= rp[0] && r <= rp[1]) 129 break; 130 if(rp == ep) 131 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) 132 return -1; 133 break; 134 case OR: 135 /* evaluate right choice later */ 136 if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) 137 return -1; 138 /* efficiency: advance and re-evaluate */ 139 continue; 140 case END: /* Match! */ 141 match = 1; 142 tlp->se.m[0].e.ep = s; 143 if(mp != 0) 144 _renewmatch(mp, ms, &tlp->se); 145 break; 146 } 147 break; 148 } 149 } 150 if(s == j->eol) 151 break; 152 checkstart = j->starttype && nl->inst==0; 153 s += n; 154 }while(r); 155 return match; 156 } 157 158 static int 159 regexec2(Reprog *progp, /* program to run */ 160 char *bol, /* string to run machine on */ 161 Resub *mp, /* subexpression elements */ 162 int ms, /* number of elements at mp */ 163 Reljunk *j 164 ) 165 { 166 int rv; 167 Relist *relist0, *relist1; 168 169 /* mark space */ 170 relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); 171 if(relist0 == nil) 172 return -1; 173 relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); 174 if(relist1 == nil){ 175 free(relist1); 176 return -1; 177 } 178 j->relist[0] = relist0; 179 j->relist[1] = relist1; 180 j->reliste[0] = relist0 + BIGLISTSIZE - 2; 181 j->reliste[1] = relist1 + BIGLISTSIZE - 2; 182 183 rv = regexec1(progp, bol, mp, ms, j); 184 free(relist0); 185 free(relist1); 186 return rv; 187 } 188 189 extern int 190 regexec(Reprog *progp, /* program to run */ 191 char *bol, /* string to run machine on */ 192 Resub *mp, /* subexpression elements */ 193 int ms) /* number of elements at mp */ 194 { 195 Reljunk j; 196 Relist relist0[LISTSIZE], relist1[LISTSIZE]; 197 int rv; 198 199 /* 200 * use user-specified starting/ending location if specified 201 */ 202 j.starts = bol; 203 j.eol = 0; 204 if(mp && ms>0){ 205 if(mp->s.sp) 206 j.starts = mp->s.sp; 207 if(mp->e.ep) 208 j.eol = mp->e.ep; 209 } 210 j.starttype = 0; 211 j.startchar = 0; 212 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { 213 j.starttype = RUNE; 214 j.startchar = progp->startinst->u1.r; 215 } 216 if(progp->startinst->type == BOL) 217 j.starttype = BOL; 218 219 /* mark space */ 220 j.relist[0] = relist0; 221 j.relist[1] = relist1; 222 j.reliste[0] = relist0 + nelem(relist0) - 2; 223 j.reliste[1] = relist1 + nelem(relist1) - 2; 224 225 rv = regexec1(progp, bol, mp, ms, &j); 226 if(rv >= 0) 227 return rv; 228 rv = regexec2(progp, bol, mp, ms, &j); 229 if(rv >= 0) 230 return rv; 231 return -1; 232 }