/******************* String Parsing Utilities *************/ /* A basic parser offered by David Rothman d_rothman@mindspring.com Comment: if you have a fixed number of fields in each line, put in a counter (instead of testing strlen) */ proc (1) = fparse(str); local out,first,rest; rest=str; out=""; do while not strlen(rest) == 0; {first,rest}=token(rest); out=out$|first; endo; out=out[2:rows(out),.]; retp(out); endp; /* A fast, flexible string parser from .. Gary King, King@Harvard.Edu, http://GKing.Harvard.Edu .. . . Dept. of Government, Harvard U, Cambridge, MA 02138 . . . . Direct: (617) 495-2027; Admin.Assistant: 495-9271 . . ..... FAX 496-5149 or 495-0438; Data Center 495-4734 ..... Comment: TOKEN2 is a version of TOKEN that is much faster for repeated calls to parse very large strings (such as a data set). It also has more flexibility in defining what is a token. */ /* ** TOKEN2.SRC - String parser ** faster version of the original TOKEN for larger strings, ** with more flexibility in defining tokens and delimiters ** ** Gary King ** ** Purpose: To extract the first token from a string. ** ** Format: { token,str_left } = TOKEN2(str); ** ** Input: str string, the string to parse. ** ** Output: token string, the first token in . ** ** str_left string, the remainder of the input string. ** ** GLOBALS: _tokdel = ascii values of acceptable delimiters ** _tokwds = ascii values of individual characters taken to be ** tokens, regardless of delimiter placement */ declare matrix _tokdel={32,10,13,44,9}; @ space, lf, cr, comma, tab @ declare matrix _tokwds={59,60,61,62}; @ ; = < > @ proc (2) = token2(str); local st,en,t,skwds,tok,str_left,slen; if _tokwds/=-1; skwds=_tokdel|_tokwds; else; skwds=_tokdel; endif; slen=strlen(str); if str$==""; retp("",""); endif; st=1; do while in(vals(strsect(str,st,1)),_tokdel,1); st=st+1; if st>slen; retp("",""); endif; endo; t=str$+" "; en=st; do until in(vals(strsect(t,en,1)),skwds,1); en=en+1; endo; if in(vals(strsect(t,en,1)),_tokwds,1) and st==en; en=en+1; endif; tok=strsect(str,st,en-st); str_left=strsect(str,en,slen); retp(tok,str_left); endp;