You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
103 lines
3.0 KiB
103 lines
3.0 KiB
/*!
|
|
* \file
|
|
*
|
|
* \brief Various routines that perform random-walk based operations
|
|
on graphs stored as gk_csr_t matrices.
|
|
*
|
|
* \author George Karypis
|
|
* \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim
|
|
*/
|
|
|
|
#include <GKlib.h>
|
|
|
|
|
|
/*************************************************************************/
|
|
/*! Computes the (personalized) page-rank of the vertices in a graph.
|
|
|
|
\param mat is the matrix storing the graph.
|
|
\param lamda is the restart probability.
|
|
\param eps is the error tolerance for convergance.
|
|
\param max_niter is the maximum number of allowed iterations.
|
|
\param pr on entry stores the restart distribution of the vertices.
|
|
This allows for the computation of personalized page-rank scores
|
|
by appropriately setting that parameter.
|
|
On return, pr stores the computed page ranks.
|
|
|
|
\returns the number of iterations that were performed.
|
|
*/
|
|
/**************************************************************************/
|
|
int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr)
|
|
{
|
|
ssize_t i, j, k, iter, nrows;
|
|
double *rscale, *prold, *prnew, *prtmp;
|
|
double fromsinks, error;
|
|
ssize_t *rowptr;
|
|
int *rowind;
|
|
float *rowval;
|
|
|
|
nrows = mat->nrows;
|
|
rowptr = mat->rowptr;
|
|
rowind = mat->rowind;
|
|
rowval = mat->rowval;
|
|
|
|
prold = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew");
|
|
prnew = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold");
|
|
rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale");
|
|
|
|
/* compute the scaling factors to get adjacency weights into transition
|
|
probabilities */
|
|
for (i=0; i<nrows; i++) {
|
|
for (j=rowptr[i]; j<rowptr[i+1]; j++)
|
|
rscale[i] += rowval[j];
|
|
if (rscale[i] > 0)
|
|
rscale[i] = 1.0/rscale[i];
|
|
}
|
|
|
|
/* the restart distribution is the initial pr scores */
|
|
for (i=0; i<nrows; i++)
|
|
prnew[i] = pr[i];
|
|
|
|
/* get into the PR iteration */
|
|
for (iter=0; iter<max_niter; iter++) {
|
|
gk_SWAP(prnew, prold, prtmp);
|
|
gk_dset(nrows, 0.0, prnew);
|
|
|
|
/* determine the total current PR score of the sinks so that you
|
|
can distribute them to all nodes according to the restart
|
|
distribution. */
|
|
for (fromsinks=0.0, i=0; i<nrows; i++) {
|
|
if (rscale[i] == 0)
|
|
fromsinks += prold[i];
|
|
}
|
|
|
|
/* push random-walk scores to the outlinks */
|
|
for (i=0; i<nrows; i++) {
|
|
for (j=rowptr[i]; j<rowptr[i+1]; j++)
|
|
prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j];
|
|
}
|
|
|
|
/* apply the restart conditions */
|
|
for (i=0; i<nrows; i++) {
|
|
prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i];
|
|
}
|
|
|
|
/* compute the error */
|
|
for (error=0.0, i=0; i<nrows; i++)
|
|
error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error);
|
|
|
|
//printf("nrm1: %le maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error);
|
|
|
|
if (error < eps)
|
|
break;
|
|
}
|
|
|
|
/* store the computed pr scores into pr for output */
|
|
for (i=0; i<nrows; i++)
|
|
pr[i] = prnew[i];
|
|
|
|
gk_free((void **)&prnew, &prold, &rscale, LTERM);
|
|
|
|
return (int)(iter+1);
|
|
|
|
}
|
|
|
|
|