42 #include <sphinxbase/pio.h>
43 #include <sphinxbase/strfuncs.h>
50 #define DEFAULT_NUM_PHONE (MAX_S3CIPID+1)
53 #define snprintf sprintf_s
56 extern const char *
const cmu6_lts_phone_table[];
59 dict_ciphone_id(
dict_t * d,
const char *str)
72 assert((wid >= 0) && (wid < d->n_word));
73 assert((pos >= 0) && (pos < d->word[wid].pronlen));
88 E_INFO(
"Reallocating to %d KiB for word entries\n",
98 wordp->
word = (
char *) ckd_salloc(word);
101 wword = ckd_salloc(word);
106 if (hash_table_lookup_int32(d->
ht, wword, &w) < 0) {
107 E_ERROR(
"Missing base word for: %s\n", word);
109 ckd_free(wordp->
word);
126 ckd_free(wordp->
word);
149 dict_read(FILE * fp,
dict_t * d)
157 size_t stralloc, phnalloc;
160 p = (
s3cipid_t *) ckd_calloc(maxwd + 4,
sizeof(*p));
161 wptr = (
char **) ckd_calloc(maxwd,
sizeof(
char *));
164 stralloc = phnalloc = 0;
165 for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
167 if (0 == strncmp(li->buf,
"##", 2)
168 || 0 == strncmp(li->buf,
";;", 2))
171 if ((nwd = str2words(li->buf, wptr, maxwd)) < 0) {
173 nwd = str2words(li->buf, NULL, 0);
176 p = (
s3cipid_t *) ckd_realloc(p, (maxwd + 4) *
sizeof(*p));
177 wptr = (
char **) ckd_realloc(wptr, maxwd *
sizeof(*wptr));
184 E_ERROR(
"Line %d: No pronunciation for word '%s'; ignored\n",
191 for (i = 1; i < nwd; i++) {
192 p[i - 1] = dict_ciphone_id(d, wptr[i]);
193 if (NOT_S3CIPID(p[i - 1])) {
194 E_ERROR(
"Line %d: Phone '%s' is mising in the acoustic model; word '%s' ignored\n",
195 lineno, wptr[i], wptr[0]);
204 (
"Line %d: Failed to add the word '%s' (duplicate?); ignored\n",
207 stralloc += strlen(d->
word[w].
word);
212 E_INFO(
"Dictionary size %d, allocated %d KiB for strings, %d KiB for phones\n",
213 dict_size(d), (
int)stralloc / 1024, (
int)phnalloc / 1024);
226 if ((fh = fopen(filename,
"w")) == NULL) {
227 E_ERROR_SYSTEM(
"Failed to open '%s'", filename);
230 for (i = 0; i < dict->
n_word; ++i) {
235 for (phlen = j = 0; j < dict_pronlen(dict, i); ++j)
237 phones = ckd_calloc(1, phlen);
238 for (j = 0; j < dict_pronlen(dict, i); ++j) {
240 if (j != dict_pronlen(dict, i) - 1)
243 fprintf(fh,
"%-30s %s\n", dict_wordstr(dict, i), phones);
259 char const *dictfile = NULL, *fillerfile = NULL;
262 dictfile = cmd_ln_str_r(config,
"-dict");
263 fillerfile = cmd_ln_str_r(config,
"_fdict");
274 if ((fp = fopen(dictfile,
"r")) == NULL) {
275 E_ERROR_SYSTEM(
"Failed to open dictionary file '%s' for reading", dictfile);
278 for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
279 if (0 != strncmp(li->buf,
"##", 2)
280 && 0 != strncmp(li->buf,
";;", 2))
283 fseek(fp, 0L, SEEK_SET);
288 if ((fp2 = fopen(fillerfile,
"r")) == NULL) {
289 E_ERROR_SYSTEM(
"Failed to open filler dictionary file '%s' for reading", fillerfile);
293 for (li = lineiter_start(fp2); li; li = lineiter_next(li)) {
294 if (0 != strncmp(li->buf,
"##", 2)
295 && 0 != strncmp(li->buf,
";;", 2))
298 fseek(fp2, 0L, SEEK_SET);
308 (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID;
309 if (n >= MAX_S3WID) {
310 E_ERROR(
"Number of words in dictionaries (%d) exceeds limit (%d)\n", n,
318 E_INFO(
"Allocating %d * %d bytes (%d KiB) for word entries\n",
327 if (config && cmd_ln_exists_r(config,
"-dictcase"))
328 d->nocase = cmd_ln_boolean_r(config,
"-dictcase");
333 E_INFO(
"Reading main dictionary: %s\n", dictfile);
336 E_INFO(
"%d words read\n", d->
n_word);
340 E_ERROR(
"Remove sentence start word '<s>' from the dictionary\n");
345 E_ERROR(
"Remove sentence start word '</s>' from the dictionary\n");
350 E_ERROR(
"Remove silence word '<sil>' from the dictionary\n");
358 E_INFO(
"Reading filler dictionary: %s\n", fillerfile);
364 sil = bin_mdef_silphone(mdef);
386 E_ERROR(
"Word '%s' must occur (only) in filler dictionary\n",
406 if (hash_table_lookup_int32(d->
ht, word, &w) < 0)
416 assert((w >= 0) && (w < d->n_word));
418 w = dict_basewid(d, w);
430 assert((w >= 0) && (w < d->n_word));
432 w = dict_basewid(d, w);
447 if (word[len - 1] ==
')') {
448 for (i = len - 2; (i > 0) && (word[i] !=
'('); --i);
479 for (i = 0; i < d->
n_word; i++) {
482 ckd_free((
void *) word->
word);
484 ckd_free((
void *) word->
ciphone);
488 ckd_free((
void *) d->
word);
490 hash_table_free(d->
ht);
493 ckd_free((
void *) d);
501 E_INFO_NOFN(
"Initialization of dict_t, report:\n");
502 E_INFO_NOFN(
"Max word: %d\n", d->
max_words);
503 E_INFO_NOFN(
"No of word: %d\n", d->
n_word);