[ Tokenise__ buf tab cx numwords len bx ix wx wpos wlen val res dictlen entrylen; len = buf-->0; buf = buf+WORDSIZE; ! First, split the buffer up into words. We use the standard Infocom ! list of word separators (comma, period, double-quote). cx = 0; numwords = 0; while (cx < len) { while (cx < len && buf->cx == ' ') cx++; if (cx >= len) break; bx = cx; if (buf->cx == '.' or ',' or '"') cx++; else { while (cx < len && buf->cx ~= ' ' or '.' or ',' or '"') cx++; } tab-->(numwords*3+2) = (cx-bx); tab-->(numwords*3+3) = WORDSIZE+bx; numwords++; if (numwords >= MAX_BUFFER_WORDS) break; } tab-->0 = numwords; ! Now we look each word up in the dictionary. dictlen = #dictionary_table-->0; entrylen = DICT_WORD_SIZE + 7; for (wx=0 : wx(wx*3+2); wpos = tab-->(wx*3+3); ! Copy the word into the gg_tokenbuf array, clipping to DICT_WORD_SIZE ! characters and lower case. if (wlen > DICT_WORD_SIZE) wlen = DICT_WORD_SIZE; cx = wpos - WORDSIZE; for (ix=0 : ixix = glk($00A0, buf->(cx+ix)); for (: ixix = 0; val = #dictionary_table + WORDSIZE; @binarysearch gg_tokenbuf DICT_WORD_SIZE val entrylen dictlen 1 1 res; tab-->(wx*3+1) = res; } ];