src/stats/cst_viterbi.c - third_party/flite - Git at Google

 /*************************************************************************/
 /*                                                                       */
 /*                  Language Technologies Institute                      */
 /*                     Carnegie Mellon University                        */
 /*                        Copyright (c) 2000                             */
 /*                        All Rights Reserved.                           */
 /*                                                                       */
 /*  Permission is hereby granted, free of charge, to use and distribute  */
 /*  this software and its documentation without restriction, including   */
 /*  without limitation the rights to use, copy, modify, merge, publish,  */
 /*  distribute, sublicense, and/or sell copies of this work, and to      */
 /*  permit persons to whom this work is furnished to do so, subject to   */
 /*  the following conditions:                                            */
 /*   1. The code must retain the above copyright notice, this list of    */
 /*      conditions and the following disclaimer.                         */
 /*   2. Any modifications must be clearly marked as such.                */
 /*   3. Original authors' names are not deleted.                         */
 /*   4. The authors' names are not used to endorse or promote products   */
 /*      derived from this software without specific prior written        */
 /*      permission.                                                      */
 /*                                                                       */
 /*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
 /*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
 /*  THIS SOFTWARE.                                                       */
 /*                                                                       */
 /*************************************************************************/
 /*             Author:  Alan W Black ([email protected])                    */
 /*               Date:  August 2000                                      */
 /*************************************************************************/
 /*                                                                       */
 /*  A Viterbi decoder                                                    */
 /*                                                                       */
 /*************************************************************************/

 #include <limits.h>
 #include "cst_viterbi.h"

 static void vit_point_init_path_array(cst_vit_point *n,int num_states);
 static void vit_point_init_dynamic_path_array(cst_vit_point *n,
 					      cst_vit_cand *c);
 static void vit_add_paths(cst_viterbi *vd,
 			  cst_vit_point *point,
 			  cst_vit_path *path);
 static void vit_add_path(cst_viterbi *vd,cst_vit_point *p, cst_vit_path *np);
 static cst_vit_path *find_best_path(cst_viterbi *vd);
 static int betterthan(cst_viterbi *v,int a, int b);

 cst_vit_cand *new_vit_cand()
 {
     cst_vit_cand *c = cst_alloc(struct cst_vit_cand_struct,1);

     return c;
 }

 void vit_cand_set(cst_vit_cand *vc, cst_val *val)
 {
 	if (vc->val)
 		delete_val(vc->val);
 	vc->val = val;
 	val_inc_refcount(vc->val);
 }

 void vit_cand_set_int(cst_vit_cand *vc, int ival)
 {
 	vc->ival = ival;
 	vit_cand_set(vc, int_val(ival));
 }

 void delete_vit_cand(cst_vit_cand *vc)
 {
     if (vc)
     {
 	delete_val(vc->val);
 	delete_vit_cand(vc->next);
 	cst_free(vc);
     }
 }

 cst_vit_path *new_vit_path()
 {
     cst_vit_path *p = cst_alloc(struct cst_vit_path_struct,1);

     return p;
 }

 void delete_vit_path(cst_vit_path *vp)
 {
     if (vp)
     {
 	if (vp->f)
 	    delete_features(vp->f);
 	delete_vit_path(vp->next);
 	cst_free(vp);
     }
 }

 cst_vit_point *new_vit_point()
 {
     cst_vit_point *p = cst_alloc(struct cst_vit_point_struct,1);

     return p;
 }

 void delete_vit_point(cst_vit_point *vp)
 {
     int i;

     if (vp)
     {
 	if (vp->paths)
 	    delete_vit_path(vp->paths);
 	if (vp->num_states != 0)
 	{
 	    for (i=0; i<vp->num_states; i++)
 		if (vp->state_paths[i])
 		    delete_vit_path(vp->state_paths[i]);
 	    cst_free(vp->state_paths);
 	}
 	delete_vit_cand(vp->cands);
 	delete_vit_point(vp->next);
 	cst_free(vp);
     }
 }

 cst_viterbi *new_viterbi(cst_vit_cand_f_t *cand_func,
 			 cst_vit_path_f_t *path_func)
 {
     cst_viterbi *v = cst_alloc(struct cst_viterbi_struct,1);

     v->cand_func = cand_func;
     v->path_func = path_func;
     v->f = new_features();
     return v;
 }

 void delete_viterbi(cst_viterbi *vd)
 {
     if (vd)
     {
 	delete_vit_point(vd->timeline);
 	delete_features(vd->f);
 	cst_free(vd);
     }

     return;
 }

 void viterbi_initialise(cst_viterbi *vd,cst_relation *r)
 {
     cst_item *i;
     cst_vit_point *last = 0;
     cst_vit_point *n = 0;

     /* Construct the timeline with points for each item in relation */
     /* initiallising the state tables at each point                 */
     for (i=relation_head(r); TRUE ; i=item_next(i))
     {
 	n = new_vit_point();
 	n->item = i;
 	if (vd->num_states > 0) /* a state based viterbi search */
 	    vit_point_init_path_array(n,vd->num_states);
 	if (last)
 	    last->next = n;
 	else
 	    vd->timeline = n;
 	last = n;
 	/* we need an extra one (even if there are no items) */
 	/* so we go one past end of the relation             */
 	if (i == 0)
 	{
 	    vd->last_point = n;
 	    break;
 	}
     }

     if (vd->num_states == 0)  /* its a general beam search */
 	vd->timeline->paths = new_vit_path();
     if (vd->num_states == -1) /* Dynamic number of states (# cands) */
 	vit_point_init_path_array(vd->timeline,1);

 }

 static void vit_point_init_path_array(cst_vit_point *n,int num_states)
 {
     n->num_states = num_states;
     n->state_paths = cst_alloc(cst_vit_path*,num_states);
 }

 static void vit_point_init_dynamic_path_array(cst_vit_point *n,
 					      cst_vit_cand *c)
 {
     int i;
     cst_vit_cand *cc;

     for (cc=c,i=0; cc; i++,cc=cc->next)
 	cc->pos = i;

     vit_point_init_path_array(n,i);
 }

 void viterbi_decode(cst_viterbi *vd)
 {
     cst_vit_point *p;
     cst_vit_path *np;
     cst_vit_cand *c;
     int i;

     /* For each time point */
     for (p=vd->timeline; p->next != NULL; p=p->next)
     {
 	/* Find the candidates */
 	p->cands = (*vd->cand_func)(p->item,vd);
 	if (vd->num_states != 0)  /* true viterbi */
 	{
 	    if (vd->num_states == -1)  /* dynamic number of states (# cands) */
 		vit_point_init_dynamic_path_array(p->next,p->cands);
 	    for (i=0; i<p->num_states; i++)
 	    {

 		if (((p == vd->timeline) && i==0) ||
 		    (p->state_paths[i] != 0))
 		    for (c=p->cands; c; c=c->next)
 		    {
 			np = (*vd->path_func)(p->state_paths[i],c,vd);
 			vit_add_paths(vd,p->next,np);
 		    }
 	    }

 	}
 	else                      /* general beam search */
 	{
 	    cst_errmsg("viterbi, general beam search not implemented\n");
 /*
 	    for (t=p->paths; t; t=t->next)
 	    {
 		for (c=p->cands; c; c=c->next)
 		{
 		    np = (vd->path_func)(t,c,vd);
 		    add_path(p->next,np);
 		}
 	    }
 */
 	}
     }
 }

 static void vit_add_paths(cst_viterbi *vd,
 			  cst_vit_point *point,
 			  cst_vit_path *path)
 {
     /* Add a list of paths at point */
     cst_vit_path *p, *next_p;

     for (p=path; p; p=next_p)
     {
 	next_p = p->next; /* as p could be deleted is not required */
 	vit_add_path(vd,point,p);
     }
 }

 static void vit_add_path(cst_viterbi *vd,cst_vit_point *p, cst_vit_path *np)
 {
     if (p->state_paths[np->state] == 0)
     {   /* we don't have one yet so this is best */
 	p->state_paths[np->state] = np;
     }
     else if (betterthan(vd,np->score,p->state_paths[np->state]->score))
     {   /* its better than what we have already */
 	delete_vit_path(p->state_paths[np->state]);
 	p->state_paths[np->state] = np;
     }
     else
 	delete_vit_path(np);
 }

 int viterbi_result(cst_viterbi *vd,const char *n)
 {
     /* Find best path through the decoder, adding field to item named n */
     /* with choising value                                              */
     cst_vit_path *p;

     if ((vd->timeline == 0) || (vd->timeline->next == 0))
 	return TRUE;  /* it has succeeded in the null case */
     p = find_best_path(vd);
     if (p == NULL)    /* bummer */
 	return FALSE;

     for (; p; p=p->from)
 	if (p->cand)
 	{
 #if 1
 	    item_set_int(p->cand->item,"cl_total_score",p->score);
 	    item_set_int(p->cand->item,"cl_cand_score",p->cand->score);
 #endif
 	    item_set(p->cand->item,n,p->cand->val);
 	}
     return TRUE;
 }

 void viterbi_copy_feature(cst_viterbi *vd,const char *featname)
 {
     /* copy a feature from the best path to related item */
     cst_vit_path *p;

     p = find_best_path(vd);
     if (p == NULL)    /* nothing to copy, emtpy stream or no solution */
 	return;

     for (; p; p=p->from)
 	if (p->cand && feat_present(p->f, featname))
 	    item_set(p->cand->item,featname,feat_val(p->f,featname));
     return;
 }

 static cst_vit_path *find_best_path(cst_viterbi *vd)
 {
     cst_vit_point *t;
     int best,worst;
     cst_vit_path *best_p=NULL;
     int i;

     if (vd->big_is_good)
 	worst = -INT_MAX;
     else
 	worst = INT_MAX;
     best = worst;  /* though we should be able to find something better */

     t = vd->last_point;

     if (vd->num_states != 0)
     {
 	for (i=0; i<t->num_states; i++)
 	{
 	    if ((t->state_paths[i] != NULL) &&
 		(betterthan(vd,t->state_paths[i]->score, best)))
 	    {
 		best = t->state_paths[i]->score;
 		best_p = t->state_paths[i];
 	    }
 	}
     }

     return best_p;
 }

 static int betterthan(cst_viterbi *v,int a, int b)
 {
     /* better may be most big or most small */
     /* for probabalities big is good */

     if (v->big_is_good)
 	return (a > b);
     else
 	return (a < b);
 }
	/*************************************************************************/
	/* */
	/* Language Technologies Institute */
	/* Carnegie Mellon University */
	/* Copyright (c) 2000 */
	/* All Rights Reserved. */
	/* */
	/* Permission is hereby granted, free of charge, to use and distribute */
	/* this software and its documentation without restriction, including */
	/* without limitation the rights to use, copy, modify, merge, publish, */
	/* distribute, sublicense, and/or sell copies of this work, and to */
	/* permit persons to whom this work is furnished to do so, subject to */
	/* the following conditions: */
	/* 1. The code must retain the above copyright notice, this list of */
	/* conditions and the following disclaimer. */
	/* 2. Any modifications must be clearly marked as such. */
	/* 3. Original authors' names are not deleted. */
	/* 4. The authors' names are not used to endorse or promote products */
	/* derived from this software without specific prior written */
	/* permission. */
	/* */
	/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
	/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
	/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
	/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
	/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
	/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
	/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
	/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
	/* THIS SOFTWARE. */
	/* */
	/*************************************************************************/
	/* Author: Alan W Black ([email protected]) */
	/* Date: August 2000 */
	/*************************************************************************/
	/* */
	/* A Viterbi decoder */
	/* */
	/*************************************************************************/

	#include <limits.h>
	#include "cst_viterbi.h"

	static void vit_point_init_path_array(cst_vit_point *n,int num_states);
	static void vit_point_init_dynamic_path_array(cst_vit_point *n,
	cst_vit_cand *c);
	static void vit_add_paths(cst_viterbi *vd,
	cst_vit_point *point,
	cst_vit_path *path);
	static void vit_add_path(cst_viterbi vd,cst_vit_point p, cst_vit_path *np);
	static cst_vit_path find_best_path(cst_viterbi vd);
	static int betterthan(cst_viterbi *v,int a, int b);

	cst_vit_cand *new_vit_cand()
	{
	cst_vit_cand *c = cst_alloc(struct cst_vit_cand_struct,1);

	return c;
	}

	void vit_cand_set(cst_vit_cand vc, cst_val val)
	{
	if (vc->val)
	delete_val(vc->val);
	vc->val = val;
	val_inc_refcount(vc->val);
	}

	void vit_cand_set_int(cst_vit_cand *vc, int ival)
	{
	vc->ival = ival;
	vit_cand_set(vc, int_val(ival));
	}

	void delete_vit_cand(cst_vit_cand *vc)
	{
	if (vc)
	{
	delete_val(vc->val);
	delete_vit_cand(vc->next);
	cst_free(vc);
	}
	}

	cst_vit_path *new_vit_path()
	{
	cst_vit_path *p = cst_alloc(struct cst_vit_path_struct,1);

	return p;
	}

	void delete_vit_path(cst_vit_path *vp)
	{
	if (vp)
	{
	if (vp->f)
	delete_features(vp->f);
	delete_vit_path(vp->next);
	cst_free(vp);
	}
	}

	cst_vit_point *new_vit_point()
	{
	cst_vit_point *p = cst_alloc(struct cst_vit_point_struct,1);

	return p;
	}

	void delete_vit_point(cst_vit_point *vp)
	{
	int i;

	if (vp)
	{
	if (vp->paths)
	delete_vit_path(vp->paths);
	if (vp->num_states != 0)
	{
	for (i=0; i<vp->num_states; i++)
	if (vp->state_paths[i])
	delete_vit_path(vp->state_paths[i]);
	cst_free(vp->state_paths);
	}
	delete_vit_cand(vp->cands);
	delete_vit_point(vp->next);
	cst_free(vp);
	}
	}

	cst_viterbi new_viterbi(cst_vit_cand_f_t cand_func,
	cst_vit_path_f_t *path_func)
	{
	cst_viterbi *v = cst_alloc(struct cst_viterbi_struct,1);

	v->cand_func = cand_func;
	v->path_func = path_func;
	v->f = new_features();
	return v;
	}

	void delete_viterbi(cst_viterbi *vd)
	{
	if (vd)
	{
	delete_vit_point(vd->timeline);
	delete_features(vd->f);
	cst_free(vd);
	}

	return;
	}

	void viterbi_initialise(cst_viterbi vd,cst_relation r)
	{
	cst_item *i;
	cst_vit_point *last = 0;
	cst_vit_point *n = 0;

	/* Construct the timeline with points for each item in relation */
	/* initiallising the state tables at each point */
	for (i=relation_head(r); TRUE ; i=item_next(i))
	{
	n = new_vit_point();
	n->item = i;
	if (vd->num_states > 0) /* a state based viterbi search */
	vit_point_init_path_array(n,vd->num_states);
	if (last)
	last->next = n;
	else
	vd->timeline = n;
	last = n;
	/* we need an extra one (even if there are no items) */
	/* so we go one past end of the relation */
	if (i == 0)
	{
	vd->last_point = n;
	break;
	}
	}

	if (vd->num_states == 0) /* its a general beam search */
	vd->timeline->paths = new_vit_path();
	if (vd->num_states == -1) /* Dynamic number of states (# cands) */
	vit_point_init_path_array(vd->timeline,1);

	}

	static void vit_point_init_path_array(cst_vit_point *n,int num_states)
	{
	n->num_states = num_states;
	n->state_paths = cst_alloc(cst_vit_path*,num_states);
	}

	static void vit_point_init_dynamic_path_array(cst_vit_point *n,
	cst_vit_cand *c)
	{
	int i;
	cst_vit_cand *cc;

	for (cc=c,i=0; cc; i++,cc=cc->next)
	cc->pos = i;

	vit_point_init_path_array(n,i);
	}

	void viterbi_decode(cst_viterbi *vd)
	{
	cst_vit_point *p;
	cst_vit_path *np;
	cst_vit_cand *c;
	int i;

	/* For each time point */
	for (p=vd->timeline; p->next != NULL; p=p->next)
	{
	/* Find the candidates */
	p->cands = (*vd->cand_func)(p->item,vd);
	if (vd->num_states != 0) /* true viterbi */
	{
	if (vd->num_states == -1) /* dynamic number of states (# cands) */
	vit_point_init_dynamic_path_array(p->next,p->cands);
	for (i=0; i<p->num_states; i++)
	{

	if (((p == vd->timeline) && i==0) \|\|
	(p->state_paths[i] != 0))
	for (c=p->cands; c; c=c->next)
	{
	np = (*vd->path_func)(p->state_paths[i],c,vd);
	vit_add_paths(vd,p->next,np);
	}
	}

	}
	else /* general beam search */
	{
	cst_errmsg("viterbi, general beam search not implemented\n");
	/*
	for (t=p->paths; t; t=t->next)
	{
	for (c=p->cands; c; c=c->next)
	{
	np = (vd->path_func)(t,c,vd);
	add_path(p->next,np);
	}
	}
	*/
	}
	}
	}

	static void vit_add_paths(cst_viterbi *vd,
	cst_vit_point *point,
	cst_vit_path *path)
	{
	/* Add a list of paths at point */
	cst_vit_path p, next_p;

	for (p=path; p; p=next_p)
	{
	next_p = p->next; /* as p could be deleted is not required */
	vit_add_path(vd,point,p);
	}
	}

	static void vit_add_path(cst_viterbi vd,cst_vit_point p, cst_vit_path *np)
	{
	if (p->state_paths[np->state] == 0)
	{ /* we don't have one yet so this is best */
	p->state_paths[np->state] = np;
	}
	else if (betterthan(vd,np->score,p->state_paths[np->state]->score))
	{ /* its better than what we have already */
	delete_vit_path(p->state_paths[np->state]);
	p->state_paths[np->state] = np;
	}
	else
	delete_vit_path(np);
	}

	int viterbi_result(cst_viterbi vd,const char n)
	{
	/* Find best path through the decoder, adding field to item named n */
	/* with choising value */
	cst_vit_path *p;

	if ((vd->timeline == 0) \|\| (vd->timeline->next == 0))
	return TRUE; /* it has succeeded in the null case */
	p = find_best_path(vd);
	if (p == NULL) /* bummer */
	return FALSE;

	for (; p; p=p->from)
	if (p->cand)
	{
	#if 1
	item_set_int(p->cand->item,"cl_total_score",p->score);
	item_set_int(p->cand->item,"cl_cand_score",p->cand->score);
	#endif
	item_set(p->cand->item,n,p->cand->val);
	}
	return TRUE;
	}

	void viterbi_copy_feature(cst_viterbi vd,const char featname)
	{
	/* copy a feature from the best path to related item */
	cst_vit_path *p;

	p = find_best_path(vd);
	if (p == NULL) /* nothing to copy, emtpy stream or no solution */
	return;

	for (; p; p=p->from)
	if (p->cand && feat_present(p->f, featname))
	item_set(p->cand->item,featname,feat_val(p->f,featname));
	return;
	}

	static cst_vit_path find_best_path(cst_viterbi vd)
	{
	cst_vit_point *t;
	int best,worst;
	cst_vit_path *best_p=NULL;
	int i;

	if (vd->big_is_good)
	worst = -INT_MAX;
	else
	worst = INT_MAX;
	best = worst; /* though we should be able to find something better */

	t = vd->last_point;

	if (vd->num_states != 0)
	{
	for (i=0; i<t->num_states; i++)
	{
	if ((t->state_paths[i] != NULL) &&
	(betterthan(vd,t->state_paths[i]->score, best)))
	{
	best = t->state_paths[i]->score;
	best_p = t->state_paths[i];
	}
	}
	}

	return best_p;
	}

	static int betterthan(cst_viterbi *v,int a, int b)
	{
	/* better may be most big or most small */
	/* for probabalities big is good */

	if (v->big_is_good)
	return (a > b);
	else
	return (a < b);
	}