1: 
  2: #include <stdio.h>
  3: #include <string.h>
  4: #include <stdlib.h>
  5: static char __file__[] = __FILE__;      /* for tassert.h                */
  6: #include        "tassert.h"
  7: 
  8: #if __sun&&__SVR4
  9: #include <alloca.h>
 10: #elif _MSC_VER
 11: #include <malloc.h>
 12: #endif
 13: 
 14: #include "speller.h"
 15: 
 16: const char idchars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
 17: 
 18: /**************************************************
 19:  * Looks for correct spelling.
 20:  * Currently only looks a 'distance' of one from the seed[].
 21:  * This does an exhaustive search, so can potentially be very slow.
 22:  * Input:
 23:  *      seed            wrongly spelled word
 24:  *      fp              search function
 25:  *      fparg           argument to search function
 26:  *      charset         character set
 27:  * Returns:
 28:  *      NULL            no correct spellings found
 29:  *      void*           value returned by fp() for first possible correct spelling
 30:  */
 31: 
 32: void *spellerY(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
 33:         const char *charset, size_t index)
 34: {
 35:     if (!seedlen)
 36:         return NULL;
 37:     assert(seed[seedlen] == 0);
 38: 
 39:     char tmp[30];
 40:     char *buf;
 41:     if (seedlen <= sizeof(tmp) - 2)
 42:         buf = tmp;
 43:     else
 44:     {
 45:         buf = (char *)alloca(seedlen + 2);    // leave space for extra char
warning C6255: _alloca indicates failure by raising a stack overflow exception. Consider using _malloca instead
46: if (!buf) 47: return NULL; // no matches 48: } 49: 50: memcpy(buf, seed, index); 51: 52: /* Delete at seed[index] */ 53: if (index < seedlen) 54: { 55: memcpy(buf + index, seed + index + 1, seedlen - index); 56: assert(buf[seedlen - 1] == 0);
warning C6385: Invalid data: accessing 'buf', the readable size is 'index' bytes, but '1000' bytes might be read: Lines: 35, 37, 39, 40, 41, 45, 46, 50, 53, 55, 56
57: void *p = (*fp)(fparg, buf); 58: if (p) 59: return p; 60: } 61: 62: if (charset && *charset) 63: { 64: /* Substitutions */ 65: if (index < seedlen) 66: { 67: memcpy(buf, seed, seedlen + 1); 68: for (const char *s = charset; *s; s++) 69: { 70: buf[index] = *s; 71: 72: //printf("sub buf = '%s'\n", buf); 73: void *p = (*fp)(fparg, buf); 74: if (p) 75: return p; 76: } 77: assert(buf[seedlen] == 0); 78: } 79: 80: /* Insertions */ 81: memcpy (buf + index + 1, seed + index, seedlen + 1 - index); 82: 83: for (const char *s = charset; *s; s++) 84: { 85: buf[index] = *s; 86: 87: //printf("ins buf = '%s'\n", buf); 88: void *p = (*fp)(fparg, buf); 89: if (p) 90: return p; 91: } 92: assert(buf[seedlen + 1] == 0); 93: } 94: 95: return NULL; // didn't find any corrections 96: } 97: 98: void *spellerX(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg, 99: const char *charset, int flag) 100: { 101: if (!seedlen) 102: return NULL; 103: 104: char tmp[30]; 105: char *buf; 106: if (seedlen <= sizeof(tmp) - 2) 107: buf = tmp; 108: else 109: { 110: buf = (char *)alloca(seedlen + 2); // leave space for extra char
warning C6255: _alloca indicates failure by raising a stack overflow exception. Consider using _malloca instead
111: if (!buf) 112: return NULL; // no matches 113: } 114: 115: /* Deletions */ 116: memcpy(buf, seed + 1, seedlen); 117: for (int i = 0; i < seedlen; i++)
warning C4018: '<' : signed/unsigned mismatch
118: { 119: //printf("del buf = '%s'\n", buf); 120: void *p; 121: if (flag) 122: p = spellerY(buf, seedlen - 1, fp, fparg, charset, i); 123: else 124: p = (*fp)(fparg, buf); 125: if (p) 126: return p; 127: 128: buf[i] = seed[i]; 129: } 130: 131: /* Transpositions */ 132: if (!flag) 133: { 134: memcpy(buf, seed, seedlen + 1); 135: for (int i = 0; i + 1 < seedlen; i++)
warning C4018: '<' : signed/unsigned mismatch
136: { 137: // swap [i] and [i + 1] 138: buf[i] = seed[i + 1]; 139: buf[i + 1] = seed[i]; 140: 141: //printf("tra buf = '%s'\n", buf); 142: void *p = (*fp)(fparg, buf); 143: if (p) 144: return p; 145: 146: buf[i] = seed[i]; 147: } 148: } 149: 150: if (charset && *charset) 151: { 152: /* Substitutions */ 153: memcpy(buf, seed, seedlen + 1); 154: for (int i = 0; i < seedlen; i++)
warning C4018: '<' : signed/unsigned mismatch
155: { 156: for (const char *s = charset; *s; s++) 157: { 158: buf[i] = *s; 159: 160: //printf("sub buf = '%s'\n", buf); 161: void *p; 162: if (flag) 163: p = spellerY(buf, seedlen, fp, fparg, charset, i + 1); 164: else 165: p = (*fp)(fparg, buf); 166: if (p) 167: return p; 168: } 169: buf[i] = seed[i]; 170: } 171: 172: /* Insertions */ 173: memcpy(buf + 1, seed, seedlen + 1); 174: for (int i = 0; i <= seedlen; i++) // yes, do seedlen+1 iterations
warning C4018: '<=' : signed/unsigned mismatch
175: { 176: for (const char *s = charset; *s; s++) 177: { 178: buf[i] = *s; 179: 180: //printf("ins buf = '%s'\n", buf); 181: void *p; 182: if (flag) 183: p = spellerY(buf, seedlen + 1, fp, fparg, charset, i + 1); 184: else 185: p = (*fp)(fparg, buf); 186: if (p) 187: return p; 188: } 189: buf[i] = seed[i]; // going past end of seed[] is ok, as we hit the 0 190: } 191: } 192: 193: return NULL; // didn't find any corrections 194: } 195: 196: void *speller(const char *seed, fp_speller_t fp, void *fparg, const char *charset) 197: { 198: size_t seedlen = strlen(seed); 199: for (int distance = 0; distance < 2; distance++) 200: { void *p = spellerX(seed, seedlen, fp, fparg, charset, distance); 201: if (p) 202: return p; 203: // if (seedlen > 10) 204: // break; 205: } 206: return NULL; // didn't find it 207: } 208: 209: 210: #if UNITTEST 211: 212: #include <stdio.h> 213: #include <string.h> 214: static char __file__[] = __FILE__; /* for tassert.h */ 215: #include "tassert.h" 216: 217: void *speller_test(void *fparg, const char *s) 218: { 219: //printf("speller_test(%s, %s)\n", fparg, s); 220: if (strcmp((char *)fparg, s) == 0) 221: return fparg; 222: return NULL; 223: } 224: 225: void unittest_speller() 226: { 227: static const char *cases[][3] = 228: { 229: { "hello", "hell", "y" }, 230: { "hello", "hel", "y" }, 231: { "hello", "ello", "y" }, 232: { "hello", "llo", "y" }, 233: { "hello", "hellox", "y" }, 234: { "hello", "helloxy", "y" }, 235: { "hello", "xhello", "y" }, 236: { "hello", "xyhello", "y" }, 237: { "hello", "ehllo", "y" }, 238: { "hello", "helol", "y" }, 239: { "hello", "abcd", "n" }, 240: //{ "ehllo", "helol", "y" }, 241: { "hello", "helxxlo", "y" }, 242: { "hello", "ehlxxlo", "n" }, 243: { "hello", "heaao", "y" }, 244: { "_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y" }, 245: }; 246: //printf("unittest_speller()\n"); 247: const void *p = speller("hello", &speller_test, (void *)"hell", idchars); 248: assert(p != NULL); 249: for (int i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) 250: { 251: //printf("case [%d]\n", i); 252: void *p = speller(cases[i][0], &speller_test, (void *)cases[i][1], idchars); 253: if (p) 254: assert(cases[i][2][0] == 'y'); 255: else 256: assert(cases[i][2][0] == 'n'); 257: } 258: //printf("unittest_speller() success\n"); 259: } 260: 261: #endif 262: