/* covmat.c (compute covariance matrix) 15-16mar98 jw */ #include #include #include #include #include #define PROG_GROUP "Jiri's matrix utilities" #define PROG_NAME "covmat" #define PROG_VERSION "1.00" #define SYSERRMSG strerror( errno ) #define EQ( p, q ) ( strcmp( (p),(q) ) == 0 ) #define LINEMAX 2048 /* function prototypes */ extern void error( int, ... ) __attribute__((noreturn)); static void print_help( void ) __attribute__((noreturn)); static void print_version( void ) __attribute__((noreturn)); static void read_args( int, char ** ); static double *read_matrix( FILE *, int [], char ** ); static void print_matrix( FILE *, char *, int, int [], double * ); static void on_fpe( int ); static void center_rows( double *, int, int ); static void covariance( double *, int, int, int, double * ); static void cov_to_cor( double *, int, int ); /* global data */ static char *helpinfo = "usage:\n" " %s [options] [infile [outfile]]\n" "options:\n" " -m don't subtract mean value\n" " -o fmt numeric format string (default: `%%le' or $OFMT)\n" " -r transform to correlation matrix\n" " -F disable SIGFPE handler\n" " -H prepend heading line to output\n" " -N m,n matrix size m rows x n columns\n" " -T output is triangular matrix\n" ; char *progname = NULL; static char *ifname = "-"; /* input file name, default = stdin */ static char *ofname = "-"; /* output file name,default = stdout */ static char *outfmt = "%le"; /* numeric output format string */ static int nomean = 0; /* no mean subtraction flag */ static int headln = 0; /* heading line request flag */ static int nocatchfpe = 0; /* don't use SIGFPE handler */ static int corr_out = 0; /* correlation request flag */ static int tri_out = 0; /* output is triangular matrix */ static int n_rows = 0; /* matrix size, unknown at startup */ static int n_columns = 0; /* matrix size, unknown at startup */ /* code starts here */ void main( int argc, char **argv ) { FILE *ifile, *ofile; double *data, *cov; int nn[2]; char *msg; read_args( argc, argv ); if ( !nocatchfpe ) signal( SIGFPE, on_fpe ); /* open input/output files */ ifile = ( ifname[0] == '-' ) ? stdin : fopen( ifname, "rt" ); if ( !ifile ) error( 1, ifname, SYSERRMSG, 0 ); ofile = ( ofname[0] == '-' ) ? stdout : fopen( ofname, "wt" ); if ( !ofile ) error( 1, ofname, SYSERRMSG, 0 ); /* read input data */ nn[0] = n_rows; nn[1] = n_columns; data = read_matrix( ifile, nn, &msg ); if ( !data ) error( 1, msg, 0 ); fclose( ifile ); /* compute matrix */ cov = calloc( nn[1] * nn[1], sizeof( double )); if ( !cov ) error( 1, SYSERRMSG, 0 ); if ( !nomean ) center_rows( data, nn[0], nn[1] ); covariance( data, nn[0], nn[1], tri_out, cov ); if ( corr_out ) cov_to_cor( cov, nn[1], tri_out ); /* write output */ nn[0] = nn[1]; if ( headln ) fprintf( ofile, "%d %d\n", nn[0], nn[1] ); print_matrix( ofile, outfmt, tri_out, nn, cov ); fclose( ofile ); /* cleanup */ free( cov ); free( data ); exit( 0 ); } void print_help( void ) { fprintf( stderr, helpinfo, progname ); exit( 0 ); } void print_version( void ) { fprintf( stderr, "%s: %s version %s\n", PROG_GROUP, PROG_NAME, PROG_VERSION ); exit( 0 ); } void read_args( int argc, char **argv ) { char *argp; if ( argp = getenv( "OFMT" )) outfmt = argp; progname = *argv++; while (( argp = *argv ) && argp[0] == '-' ) { if ( EQ( argp, "--help" ) || EQ( argp, "-?" )) print_help(); if EQ( argp, "--version" ) print_version(); if EQ( argp, "-m" ) { nomean = 1; argv += 1; } else if EQ( argp, "-o" ) if ( outfmt = argv[1] ) argv += 2; else goto noarg; else if EQ( argp, "-r" ) { corr_out = 1; argv += 1; } else if EQ( argp, "-F" ) { nocatchfpe = 1; argv += 1; } else if EQ( argp, "-H" ) { headln = 1; argv += 1; } else if EQ( argp, "-N" ) { if ( argv[1] ) if ( sscanf( argv[1], "%i,%i", &n_rows, &n_columns ) == 2 && n_rows > 0 && n_columns > 0 ) argv += 2; else error( 1, "invalid parameter", argp, 0 ); else goto noarg; } else if EQ( argp, "-T" ) { tri_out = 1; argv += 1; } else if ( argp[1] == 0 ) /* `-' indicates default input (stdin) */ break; else error( 1, "unknown option", argp, 0 ); } /* while */ if ( *argv ) ifname = *argv++; if ( *argv ) ofname = *argv; return; noarg: error( 1, "missing parameter", argp, 0 ); } double *read_matrix( FILE *f, int np[], char **msgp ) { int m, n, i, j, ok = 0; double *matp = NULL, *rowp; char line[LINEMAX] = ""; char *lp, *msg = NULL; m = np[0]; n = np[1]; if ( m == 0 || n == 0 ) { /* get matrix size */ fgets( line, sizeof line, f ); if ( sscanf( line, " %i %i", &m, &n ) < 2 ) { msg = "matrix size unknown"; goto done; } } /* allocate matrix buffer */ if ( m < 1 || n < 1 ) { msg = "invalid matrix size"; goto done; } matp = calloc( m * n, sizeof( double )); if ( !matp ) { msg = SYSERRMSG; goto done; } /* now read matrix data */ rowp = matp; for ( i = 0; i < m; i++ ) { if ( lp = fgets( line, sizeof line, f )) { for ( j = 0; j < n; j++ ) if ( *lp == '\n' ) { msg = "unexpected eol"; goto done; } else rowp[j] = strtod( lp, &lp ); } else { msg = "unexpected eof"; goto done; } rowp += n; } np[0] = m; np[1] = n; ok = 1; done: if ( msgp ) *msgp = msg; return ok ? matp : NULL; } void print_matrix( FILE *f, char *out, int tri, int np[], double *mat ) { double *p = mat; int i, j, m, n; m = np[0]; n = np[1]; for ( i = 0; i < m; i++ ) { int l = tri ? i+1 : n; for ( j = 0; j < l; j++ ) { if ( j ) fputc( ' ', f ); fprintf( f, out, p[j] ); } fputc( '\n', f ); p += n; } } void on_fpe( int sig ) { error( 2, "floating point error", 0 ); } /*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/ void v_zero( double *v, int n ) { while ( n-- ) *v++ = 0.; } void v_add( double *v, int n, double *u ) { while ( n-- > 0 ) *v++ += *u++; } void v_sub( double *v, int n, double *u ) { while ( n-- > 0 ) *v++ -= *u++; } void v_divide( double *v, int n, double k ) { while ( n-- ) *v++ /= k; } void m_zero( double *mat, int m, int n ) { bzero( mat, m * n * sizeof( double )); } void m_divide( double *mat, int m, int n, double k ) { double *p = mat; int i; for ( i = 0; i < m; i++ ) { v_divide( p, n, k ); p += n; } } void add_m_prod( double *mat, int n, double *u ) { double *p = mat; int i, j; for ( i = 0; i < n; i++ ) { for ( j = 0; j <= i; j++ ) p[j] += u[i] * u[j]; p += n; } } void sum_rows( double *mat, int m, int n, double s[] ) { double *p = mat; int i; v_zero( s, n ); for ( i = 0; i < m; i++ ) { v_add( s, n, p ); p += n; } } void sub_rows( double *mat, int m, int n, double v[] ) { double *p = mat; int i; for ( i = 0; i < m; i++ ) { v_sub( p, n, v ); p += n; } } void center_rows( double *mat, int m, int n ) { double *mean; mean = alloca( n * sizeof( double )); sum_rows( mat , m, n, mean ); v_divide( mean, n, (double)( m )); sub_rows( mat, m, n, mean ); } void makesym( double *mat, int n ) { double *rp, *cp; int i, j; for ( i = 1; i < n; i++ ) { rp = mat + i * n; /* pointer to i-th row */ cp = mat + i; /* pointer to i-th column */ for ( j = 0; j < i; j++ ) { *cp = *rp; rp++; cp += n; } } } void covariance( double *mat, int m, int n, int tri, double *cov ) { double *p = mat; int i; m_zero( cov, n, n ); for ( i = 0; i < m; i++ ) { add_m_prod( cov, n, p ); p += n; } if ( !tri ) makesym( cov, n ); m_divide( cov, n, n, (double)( m )); } void cov_to_cor( double *mat, int n, int tri ) { double *p, *q, d; int i, j; for ( i = 1; i < n; i++ ) { p = mat + i * n; q = mat; for ( j = 0; j < i; j++ ) { if (( d = p[i] * ( *q )) != 0. ) p[j] /= sqrt( d ); q += n + 1; } } for ( q = mat, i = 0; i < n; q += n+1, i++ ) *q = 1.; if ( !tri ) makesym( mat, n ); } /* end of file */