wmii

git clone git://oldgit.suckless.org/wmii/
Log | Files | Refs | README | LICENSE

regexec.c (5048B)


      1 #include <stdlib.h>
      2 #include "plan9.h"
      3 #include "regexp9.h"
      4 #include "regcomp.h"
      5 
      6 
      7 /*
      8  *  return	0 if no match
      9  *		>0 if a match
     10  *		<0 if we ran out of _relist space
     11  */
     12 static int
     13 regexec1(Reprog *progp,	/* program to run */
     14 	char *bol,	/* string to run machine on */
     15 	Resub *mp,	/* subexpression elements */
     16 	int ms,		/* number of elements at mp */
     17 	Reljunk *j
     18 )
     19 {
     20 	int flag=0;
     21 	Reinst *inst;
     22 	Relist *tlp;
     23 	char *s;
     24 	int i, checkstart;
     25 	Rune r, *rp, *ep;
     26 	int n;
     27 	Relist* tl;		/* This list, next list */
     28 	Relist* nl;
     29 	Relist* tle;		/* ends of this and next list */
     30 	Relist* nle;
     31 	int match;
     32 	char *p;
     33 
     34 	match = 0;
     35 	checkstart = j->starttype;
     36 	if(mp)
     37 		for(i=0; i<ms; i++) {
     38 			mp[i].s.sp = 0;
     39 			mp[i].e.ep = 0;
     40 		}
     41 	j->relist[0][0].inst = 0;
     42 	j->relist[1][0].inst = 0;
     43 
     44 	/* Execute machine once for each character, including terminal NUL */
     45 	s = j->starts;
     46 	do{
     47 		/* fast check for first char */
     48 		if(checkstart) {
     49 			switch(j->starttype) {
     50 			case RUNE:
     51 				p = utfrune(s, j->startchar);
     52 				if(p == 0 || s == j->eol)
     53 					return match;
     54 				s = p;
     55 				break;
     56 			case BOL:
     57 				if(s == bol)
     58 					break;
     59 				p = utfrune(s, '\n');
     60 				if(p == 0 || s == j->eol)
     61 					return match;
     62 				s = p;
     63 				break;
     64 			}
     65 		}
     66 		r = *(uchar*)s;
     67 		if(r < Runeself)
     68 			n = 1;
     69 		else
     70 			n = chartorune(&r, s);
     71 
     72 		/* switch run lists */
     73 		tl = j->relist[flag];
     74 		tle = j->reliste[flag];
     75 		nl = j->relist[flag^=1];
     76 		nle = j->reliste[flag];
     77 		nl->inst = 0;
     78 
     79 		/* Add first instruction to current list */
     80 		if(match == 0)
     81 			_renewemptythread(tl, progp->startinst, ms, s);
     82 
     83 		/* Execute machine until current list is empty */
     84 		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
     85 			for(inst = tlp->inst; ; inst = inst->u2.next){
     86 				switch(inst->type){
     87 				case RUNE:	/* regular character */
     88 					if(inst->u1.r == r){
     89 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
     90 							return -1;
     91 					}
     92 					break;
     93 				case LBRA:
     94 					tlp->se.m[inst->u1.subid].s.sp = s;
     95 					continue;
     96 				case RBRA:
     97 					tlp->se.m[inst->u1.subid].e.ep = s;
     98 					continue;
     99 				case ANY:
    100 					if(r != '\n')
    101 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    102 							return -1;
    103 					break;
    104 				case ANYNL:
    105 					if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    106 							return -1;
    107 					break;
    108 				case BOL:
    109 					if(s == bol || *(s-1) == '\n')
    110 						continue;
    111 					break;
    112 				case EOL:
    113 					if(s == j->eol || r == 0 || r == '\n')
    114 						continue;
    115 					break;
    116 				case CCLASS:
    117 					ep = inst->u1.cp->end;
    118 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
    119 						if(r >= rp[0] && r <= rp[1]){
    120 							if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    121 								return -1;
    122 							break;
    123 						}
    124 					break;
    125 				case NCCLASS:
    126 					ep = inst->u1.cp->end;
    127 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
    128 						if(r >= rp[0] && r <= rp[1])
    129 							break;
    130 					if(rp == ep)
    131 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    132 							return -1;
    133 					break;
    134 				case OR:
    135 					/* evaluate right choice later */
    136 					if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
    137 						return -1;
    138 					/* efficiency: advance and re-evaluate */
    139 					continue;
    140 				case END:	/* Match! */
    141 					match = 1;
    142 					tlp->se.m[0].e.ep = s;
    143 					if(mp != 0)
    144 						_renewmatch(mp, ms, &tlp->se);
    145 					break;
    146 				}
    147 				break;
    148 			}
    149 		}
    150 		if(s == j->eol)
    151 			break;
    152 		checkstart = j->starttype && nl->inst==0;
    153 		s += n;
    154 	}while(r);
    155 	return match;
    156 }
    157 
    158 static int
    159 regexec2(Reprog *progp,	/* program to run */
    160 	char *bol,	/* string to run machine on */
    161 	Resub *mp,	/* subexpression elements */
    162 	int ms,		/* number of elements at mp */
    163 	Reljunk *j
    164 )
    165 {
    166 	int rv;
    167 	Relist *relist0, *relist1;
    168 
    169 	/* mark space */
    170 	relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
    171 	if(relist0 == nil)
    172 		return -1;
    173 	relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
    174 	if(relist1 == nil){
    175 		free(relist1);
    176 		return -1;
    177 	}
    178 	j->relist[0] = relist0;
    179 	j->relist[1] = relist1;
    180 	j->reliste[0] = relist0 + BIGLISTSIZE - 2;
    181 	j->reliste[1] = relist1 + BIGLISTSIZE - 2;
    182 
    183 	rv = regexec1(progp, bol, mp, ms, j);
    184 	free(relist0);
    185 	free(relist1);
    186 	return rv;
    187 }
    188 
    189 extern int
    190 regexec(Reprog *progp,	/* program to run */
    191 	char *bol,	/* string to run machine on */
    192 	Resub *mp,	/* subexpression elements */
    193 	int ms)		/* number of elements at mp */
    194 {
    195 	Reljunk j;
    196 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
    197 	int rv;
    198 
    199 	/*
    200  	 *  use user-specified starting/ending location if specified
    201 	 */
    202 	j.starts = bol;
    203 	j.eol = 0;
    204 	if(mp && ms>0){
    205 		if(mp->s.sp)
    206 			j.starts = mp->s.sp;
    207 		if(mp->e.ep)
    208 			j.eol = mp->e.ep;
    209 	}
    210 	j.starttype = 0;
    211 	j.startchar = 0;
    212 	if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
    213 		j.starttype = RUNE;
    214 		j.startchar = progp->startinst->u1.r;
    215 	}
    216 	if(progp->startinst->type == BOL)
    217 		j.starttype = BOL;
    218 
    219 	/* mark space */
    220 	j.relist[0] = relist0;
    221 	j.relist[1] = relist1;
    222 	j.reliste[0] = relist0 + nelem(relist0) - 2;
    223 	j.reliste[1] = relist1 + nelem(relist1) - 2;
    224 
    225 	rv = regexec1(progp, bol, mp, ms, &j);
    226 	if(rv >= 0)
    227 		return rv;
    228 	rv = regexec2(progp, bol, mp, ms, &j);
    229 	if(rv >= 0)
    230 		return rv;
    231 	return -1;
    232 }