Initial commit

main
lib 2 years ago
commit f0eba24f89
  1. 31
      CMakeLists.txt
  2. 0
      DEPENDENCIES
  3. 85
      GKlib.h
  4. 152
      GKlibSystem.cmake
  5. 18
      LICENSE.txt
  6. 87
      Makefile
  7. 54
      README.md
  8. 12
      SETUP
  9. 95
      b64.c
  10. 37
      blas.c
  11. 126
      cache.c
  12. 5
      conf/check_thread_storage.c
  13. 3378
      csr.c
  14. 214
      error.c
  15. 132
      evaluate.c
  16. 142
      fkvkselect.c
  17. 225
      fs.c
  18. 855
      getopt.c
  19. 70
      gk_arch.h
  20. 87
      gk_defs.h
  21. 25
      gk_externs.h
  22. 64
      gk_getopt.h
  23. 169
      gk_macros.h
  24. 203
      gk_mkblas.h
  25. 142
      gk_mkmemory.h
  26. 440
      gk_mkpqueue.h
  27. 215
      gk_mkpqueue2.h
  28. 123
      gk_mkrandom.h
  29. 271
      gk_mksort.h
  30. 40
      gk_mkutils.h
  31. 301
      gk_ms_inttypes.h
  32. 22
      gk_ms_stat.h
  33. 222
      gk_ms_stdint.h
  34. 426
      gk_proto.h
  35. 296
      gk_struct.h
  36. 38
      gk_types.h
  37. 107
      gk_util.c
  38. 10704
      gkregex.c
  39. 556
      gkregex.h
  40. 1940
      graph.c
  41. 247
      htable.c
  42. 681
      io.c
  43. 210
      itemsets.c
  44. 393
      mcore.c
  45. 307
      memory.c
  46. 25
      pqueue.c
  47. 136
      random.c
  48. 103
      rw.c
  49. 53
      scripts/gexpand.pl
  50. 174
      seq.c
  51. 437
      sort.c
  52. 530
      string.c
  53. 19
      test/CMakeLists.txt
  54. 301
      test/cmpnbrs.c
  55. 397
      test/csrcnv.c
  56. 286
      test/fis.c
  57. 845
      test/gkgraph.c
  58. 346
      test/gksort.c
  59. 268
      test/gkuniq.c
  60. 256
      test/grKx.c
  61. 304
      test/m2mnbrs.c
  62. 306
      test/rw.c
  63. 98
      test/splatt2svd.c
  64. 82
      test/strings.c
  65. 52
      timers.c
  66. 77
      tokenizer.c
  67. 11
      win32/adapt.c
  68. 14
      win32/adapt.h

@ -0,0 +1,31 @@
cmake_minimum_required(VERSION 2.8)
project(GKlib C)
option(BUILD_SHARED_LIBS "Build shared libraries (.dll/.so) instead of static ones (.lib/.a)" OFF)
get_filename_component(abs "." ABSOLUTE)
set(GKLIB_PATH ${abs})
unset(abs)
include(GKlibSystem.cmake)
include_directories(".")
if(MSVC)
include_directories("win32")
file(GLOB win32_sources RELATIVE "win32" "*.c")
else(MSVC)
set(win32_sources, "")
endif(MSVC)
add_library(GKlib ${GKlib_sources} ${win32_sources})
if(UNIX)
target_link_libraries(GKlib m)
endif(UNIX)
include_directories("test")
add_subdirectory("test")
install(TARGETS GKlib
ARCHIVE DESTINATION lib/${LINSTALL_PATH}
LIBRARY DESTINATION lib/${LINSTALL_PATH})
install(FILES ${GKlib_includes} DESTINATION include/${HINSTALL_PATH})

@ -0,0 +1,85 @@
/*
* GKlib.h
*
* George's library of most frequently used routines
*
* $Id: GKlib.h 14866 2013-08-03 16:40:04Z karypis $
*
*/
#ifndef _GKLIB_H_
#define _GKLIB_H_ 1
#define GKMSPACE
#if defined(_MSC_VER)
#define __MSC__
#endif
#if defined(__ICC)
#define __ICC__
#endif
#include "gk_arch.h" /*!< This should be here, prior to the includes */
/*************************************************************************
* Header file inclusion section
**************************************************************************/
#include <stddef.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdio.h>
#include <memory.h>
#include <errno.h>
#include <ctype.h>
#include <math.h>
#include <float.h>
#include <time.h>
#include <string.h>
#include <limits.h>
#include <signal.h>
#include <setjmp.h>
#include <assert.h>
#include <sys/stat.h>
#if defined(__WITHPCRE__)
#include <pcreposix.h>
#else
#if defined(USE_GKREGEX)
#include "gkregex.h"
#else
#include <regex.h>
#endif /* defined(USE_GKREGEX) */
#endif /* defined(__WITHPCRE__) */
#if defined(__OPENMP__)
#include <omp.h>
#endif
#include <gk_types.h>
#include <gk_struct.h>
#include <gk_externs.h>
#include <gk_defs.h>
#include <gk_macros.h>
#include <gk_getopt.h>
#include <gk_mksort.h>
#include <gk_mkblas.h>
#include <gk_mkmemory.h>
#include <gk_mkpqueue.h>
#include <gk_mkpqueue2.h>
#include <gk_mkrandom.h>
#include <gk_mkutils.h>
#include <gk_proto.h>
#endif /* GKlib.h */

@ -0,0 +1,152 @@
# Helper modules.
include(CheckFunctionExists)
include(CheckIncludeFile)
# Setup options.
option(GDB "enable use of GDB" OFF)
option(ASSERT "turn asserts on" OFF)
option(ASSERT2 "additional assertions" OFF)
option(DEBUG "add debugging support" OFF)
option(GPROF "add gprof support" OFF)
option(VALGRIND "add valgrind support" OFF)
option(OPENMP "enable OpenMP support" OFF)
option(PCRE "enable PCRE support" OFF)
option(GKREGEX "enable GKREGEX support" OFF)
option(GKRAND "enable GKRAND support" OFF)
option(NO_X86 "enable NO_X86 support" OFF)
# Add compiler flags.
if(MSVC)
set(GKlib_COPTS "/Ox")
set(GKlib_COPTIONS "-DWIN32 -DMSC -D_CRT_SECURE_NO_DEPRECATE -DUSE_GKREGEX")
elseif(MINGW)
set(GKlib_COPTS "-DUSE_GKREGEX")
else()
set(GKlib_COPTIONS "-DLINUX -D_FILE_OFFSET_BITS=64")
endif(MSVC)
if(CYGWIN)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -DCYGWIN")
endif(CYGWIN)
if(CMAKE_COMPILER_IS_GNUCC)
# GCC opts.
set(GKlib_COPTIONS "${GKlib_COPTIONS} -std=c99 -fno-strict-aliasing")
if(VALGRIND)
set(GKlib_COPTIONS "${GK_COPTIONS} -march=x86-64 -mtune=generic")
else()
# -march=native is not a valid flag on PPC:
if(CMAKE_SYSTEM_PROCESSOR MATCHES "power|ppc|powerpc|ppc64|powerpc64" OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "ppc|ppc64"))
set(GKlib_COPTIONS "${GKlib_COPTIONS} -mtune=native")
else()
set(GKlib_COPTIONS "${GKlib_COPTIONS} -march=native")
endif()
endif(VALGRIND)
if(NOT MINGW)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -fPIC")
endif(NOT MINGW)
# GCC warnings.
set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror -Wall -pedantic -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unknown-pragmas -Wno-unused-label")
elseif(${CMAKE_C_COMPILER_ID} MATCHES "Sun")
# Sun insists on -xc99.
set(GKlib_COPTIONS "${GKlib_COPTIONS} -xc99")
endif(CMAKE_COMPILER_IS_GNUCC)
# Intel compiler
if(${CMAKE_C_COMPILER_ID} MATCHES "Intel")
set(GKlib_COPTIONS "${GKlib_COPTIONS} -xHost -std=c99")
endif()
# Find OpenMP if it is requested.
if(OPENMP)
include(FindOpenMP)
if(OPENMP_FOUND)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__OPENMP__ ${OpenMP_C_FLAGS}")
else()
message(WARNING "OpenMP was requested but support was not found")
endif(OPENMP_FOUND)
endif(OPENMP)
# Set the CPU type
if(NO_X86)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNO_X86=${NO_X86}")
endif(NO_X86)
# Add various definitions.
if(GDB)
set(GKlib_COPTS "${GKlib_COPTS} -g")
set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror")
else()
set(GKlib_COPTS "-O3")
endif(GDB)
if(DEBUG)
set(GKlib_COPTS "-g")
set(GKlib_COPTIONS "${GKlib_COPTIONS} -DDEBUG")
endif(DEBUG)
if(GPROF)
set(GKlib_COPTS "-pg")
endif(GPROF)
if(NOT ASSERT)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG")
endif(NOT ASSERT)
if(NOT ASSERT2)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG2")
endif(NOT ASSERT2)
# Add various options
if(PCRE)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__WITHPCRE__")
endif(PCRE)
if(GKREGEX)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKREGEX")
endif(GKREGEX)
if(GKRAND)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKRAND")
endif(GKRAND)
# Check for features.
check_include_file(execinfo.h HAVE_EXECINFO_H)
if(HAVE_EXECINFO_H)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_EXECINFO_H")
endif(HAVE_EXECINFO_H)
check_function_exists(getline HAVE_GETLINE)
if(HAVE_GETLINE)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_GETLINE")
endif(HAVE_GETLINE)
# Custom check for TLS.
if(MSVC)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=__declspec(thread)")
# This if checks if that value is cached or not.
if("${HAVE_THREADLOCALSTORAGE}" MATCHES "^${HAVE_THREADLOCALSTORAGE}$")
try_compile(HAVE_THREADLOCALSTORAGE
${CMAKE_BINARY_DIR}
${GKLIB_PATH}/conf/check_thread_storage.c)
if(HAVE_THREADLOCALSTORAGE)
message(STATUS "checking for thread-local storage - found")
else()
message(STATUS "checking for thread-local storage - not found")
endif()
endif()
if(NOT HAVE_THREADLOCALSTORAGE)
set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=")
endif()
endif()
# Finally set the official C flags.
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GKlib_COPTIONS} ${GKlib_COPTS}")
# Find GKlib sources.
file(GLOB GKlib_sources ${GKLIB_PATH}/*.c)
file(GLOB GKlib_includes ${GKLIB_PATH}/*.h)

@ -0,0 +1,18 @@
Copyright & License Notice
---------------------------
Copyright 1995-2018, Regents of the University of Minnesota
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.

@ -0,0 +1,87 @@
# Configuration options.
cc = gcc
prefix = ~/local
openmp = not-set
gdb = not-set
assert = not-set
assert2 = not-set
debug = not-set
gprof = not-set
valgrind = not-set
pcre = not-set
gkregex = not-set
gkrand = not-set
# Basically proxies everything to the builddir cmake.
cputype = $(shell uname -m | sed "s/\\ /_/g")
systype = $(shell uname -s)
BUILDDIR = build/$(systype)-$(cputype)
# Process configuration options.
CONFIG_FLAGS = -DCMAKE_VERBOSE_MAKEFILE=1
ifneq ($(gdb), not-set)
CONFIG_FLAGS += -DGDB=$(gdb)
endif
ifneq ($(assert), not-set)
CONFIG_FLAGS += -DASSERT=$(assert)
endif
ifneq ($(assert2), not-set)
CONFIG_FLAGS += -DASSERT2=$(assert2)
endif
ifneq ($(debug), not-set)
CONFIG_FLAGS += -DDEBUG=$(debug)
endif
ifneq ($(gprof), not-set)
CONFIG_FLAGS += -DGPROF=$(gprof)
endif
ifneq ($(valgrind), not-set)
CONFIG_FLAGS += -DVALGRIND=$(valgrind)
endif
ifneq ($(openmp), not-set)
CONFIG_FLAGS += -DOPENMP=$(openmp)
endif
ifneq ($(pcre), not-set)
CONFIG_FLAGS += -DPCRE=$(pcre)
endif
ifneq ($(gkregex), not-set)
CONFIG_FLAGS += -DGKREGEX=$(pcre)
endif
ifneq ($(gkrand), not-set)
CONFIG_FLAGS += -DGKRAND=$(pcre)
endif
ifneq ($(prefix), not-set)
CONFIG_FLAGS += -DCMAKE_INSTALL_PREFIX=$(prefix)
endif
ifneq ($(cc), not-set)
CONFIG_FLAGS += -DCMAKE_C_COMPILER=$(cc)
endif
ifneq ($(cputype), x86_64)
CONFIG_FLAGS += -DNO_X86=$(cputype)
endif
define run-config
mkdir -p $(BUILDDIR)
cd $(BUILDDIR) && cmake $(CURDIR) $(CONFIG_FLAGS)
endef
all clean install: $(BUILDDIR)
make -C $(BUILDDIR) $@
uninstall:
xargs rm < $(BUILDDIR)/install_manifest.txt
$(BUILDDIR):
$(run-config)
config: distclean
$(run-config)
distclean:
rm -rf $(BUILDDIR)
remake:
find . -name CMakeLists.txt -exec touch {} ';'
.PHONY: config distclean all clean install uninstall remake

@ -0,0 +1,54 @@
# GKlib
A library of various helper routines and frameworks used by many of the lab's software
## Build requirements
- CMake 2.8, found at http://www.cmake.org/, as well as GNU make.
Assuming that the above are available, two commands should suffice to
build the software:
```
make config
make
```
## Configuring the build
It is primarily configured by passing options to make config. For example:
```
make config cc=icc
```
would configure it to be built using icc.
Configuration options are:
```
cc=[compiler] - The C compiler to use [default: gcc]
prefix=[PATH] - Set the installation prefix [default: ~/local]
openmp=set - To build a version with OpenMP support
```
## Building and installing
To build and install, run the following
```
make
make install
```
By default, the library file, header file, and binaries will be installed in
```
~/local/lib
~/local/include
~/local/bin
```
## Other make commands
make uninstall
Removes all files installed by 'make install'.
make clean
Removes all object files but retains the configuration options.
make distclean
Performs clean and completely removes the build directory.

12
SETUP

@ -0,0 +1,12 @@
#!/usr/bin/env bash
if [ -z "$1" ]
then
printf "Usage: setup.sh install_directory [dependencies].\n" 1>&2
exit 1
fi
cd $(dirname $0)
libname=$(basename $(pwd))
make config shared=1 prefix="$1"
make install

95
b64.c

@ -0,0 +1,95 @@
/*!
\file b64.c
\brief This file contains some simple 8bit-to-6bit encoding/deconding routines
Most of these routines are outdated and should be converted using glibc's equivalent
routines.
\date Started 2/22/05
\author George
\version\verbatim $Id: b64.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
\verbatim
$Copyright$
$License$
\endverbatim
*/
#include "GKlib.h"
#define B64OFFSET 48 /* This is the '0' number */
/******************************************************************************
* Encode 3 '8-bit' binary bytes as 4 '6-bit' characters
*******************************************************************************/
void encodeblock(unsigned char *in, unsigned char *out)
{
out[0] = (in[0] >> 2);
out[1] = (((in[0] & 0x03) << 4) | (in[1] >> 4));
out[2] = (((in[1] & 0x0f) << 2) | (in[2] >> 6));
out[3] = (in[2] & 0x3f);
out[0] += B64OFFSET;
out[1] += B64OFFSET;
out[2] += B64OFFSET;
out[3] += B64OFFSET;
// printf("%c %c %c %c %2x %2x %2x %2x %2x %2x %2x\n", out[0], out[1], out[2], out[3], out[0], out[1], out[2], out[3], in[0], in[1], in[2]);
}
/******************************************************************************
* Decode 4 '6-bit' characters into 3 '8-bit' binary bytes
*******************************************************************************/
void decodeblock(unsigned char *in, unsigned char *out)
{
in[0] -= B64OFFSET;
in[1] -= B64OFFSET;
in[2] -= B64OFFSET;
in[3] -= B64OFFSET;
out[0] = (in[0] << 2 | in[1] >> 4);
out[1] = (in[1] << 4 | in[2] >> 2);
out[2] = (in[2] << 6 | in[3]);
}
/******************************************************************************
* This function encodes an input array of bytes into a base64 encoding. Memory
* for the output array is assumed to have been allocated by the calling program
* and be sufficiently large. The output string is NULL terminated.
*******************************************************************************/
void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer)
{
int i, j;
if (nbytes%3 != 0)
gk_errexit(SIGERR, "GKEncodeBase64: Input buffer size should be a multiple of 3! (%d)\n", nbytes);
for (j=0, i=0; i<nbytes; i+=3, j+=4)
encodeblock(inbuffer+i, outbuffer+j);
//printf("%d %d\n", nbytes, j);
outbuffer[j] = '\0';
}
/******************************************************************************
* This function decodes an input array of base64 characters into their actual
* 8-bit codes. Memory * for the output array is assumed to have been allocated
* by the calling program and be sufficiently large. The padding is discarded.
*******************************************************************************/
void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer)
{
int i, j;
if (nbytes%4 != 0)
gk_errexit(SIGERR, "GKDecodeBase64: Input buffer size should be a multiple of 4! (%d)\n", nbytes);
for (j=0, i=0; i<nbytes; i+=4, j+=3)
decodeblock(inbuffer+i, outbuffer+j);
}

@ -0,0 +1,37 @@
/*!
\file blas.c
\brief This file contains GKlib's implementation of BLAS-like routines
The BLAS routines that are currently implemented are mostly level-one.
They follow a naming convention of the type gk_[type][name], where
[type] is one of c, i, f, and d, based on C's four standard scalar
datatypes of characters, integers, floats, and doubles.
These routines are implemented using a generic macro template,
which is used for code generation.
\date Started 9/28/95
\author George
\version\verbatim $Id: blas.c 14330 2013-05-18 12:15:15Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Use the templates to generate BLAS routines for the scalar data types */
/*************************************************************************/
GK_MKBLAS(gk_c, char, int)
GK_MKBLAS(gk_i, int, int)
GK_MKBLAS(gk_i32, int32_t, int32_t)
GK_MKBLAS(gk_i64, int64_t, int64_t)
GK_MKBLAS(gk_z, ssize_t, ssize_t)
GK_MKBLAS(gk_zu, size_t, size_t)
GK_MKBLAS(gk_f, float, float)
GK_MKBLAS(gk_d, double, double)
GK_MKBLAS(gk_idx, gk_idx_t, gk_idx_t)

@ -0,0 +1,126 @@
/*!
\file
\brief Functions dealing with simulating cache behavior for performance
modeling and analysis;
\date Started 4/13/18
\author George
\author Copyright 1997-2011, Regents of the University of Minnesota
\version $Id: cache.c 21991 2018-04-16 03:08:12Z karypis $
*/
#include <GKlib.h>
/*************************************************************************/
/*! This function creates a cache
*/
/*************************************************************************/
gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits)
{
gk_cache_t *cache;
cache = (gk_cache_t *)gk_malloc(sizeof(gk_cache_t), "gk_cacheCreate: cache");
memset(cache, 0, sizeof(gk_cache_t));
cache->nway = nway;
cache->lnbits = lnbits;
cache->cnbits = cnbits;
cache->csize = 1<<cnbits;
cache->cmask = cache->csize-1;
cache->latimes = gk_ui64smalloc(cache->csize*nway, 0, "gk_cacheCreate: latimes");
cache->clines = gk_zusmalloc(cache->csize*nway, 0, "gk_cacheCreate: clines");
return cache;
}
/*************************************************************************/
/*! This function resets a cache
*/
/*************************************************************************/
void gk_cacheReset(gk_cache_t *cache)
{
cache->nhits = 0;
cache->nmisses = 0;
gk_ui64set(cache->csize*cache->nway, 0, cache->latimes);
gk_zuset(cache->csize*cache->nway, 0, cache->clines);
return;
}
/*************************************************************************/
/*! This function destroys a cache.
*/
/*************************************************************************/
void gk_cacheDestroy(gk_cache_t **r_cache)
{
gk_cache_t *cache = *r_cache;
if (cache == NULL)
return;
gk_free((void **)&cache->clines, &cache->latimes, &cache, LTERM);
*r_cache = NULL;
}
/*************************************************************************/
/*! This function simulates a load(ptr) operation.
*/
/*************************************************************************/
int gk_cacheLoad(gk_cache_t *cache, size_t addr)
{
uint32_t i, nway=cache->nway;
size_t lru=0;
//printf("%16"PRIx64" ", (uint64_t)addr);
addr = addr>>(cache->lnbits);
//printf("%16"PRIx64" %16"PRIx64" %16"PRIx64" ", (uint64_t)addr, (uint64_t)addr&(cache->cmask), (uint64_t)cache->cmask);
size_t *clines = cache->clines + (addr&(cache->cmask));
uint64_t *latimes = cache->latimes + (addr&(cache->cmask));
cache->clock++;
for (i=0; i<nway; i++) { /* look for hits */
if (clines[i] == addr) {
cache->nhits++;
latimes[i] = cache->clock;
goto DONE;
}
}
for (i=0; i<nway; i++) { /* look for empty spots or the lru spot */
if (clines[i] == 0) {
lru = i;
break;
}
else if (latimes[i] < latimes[lru]) {
lru = i;
}
}
/* initial fill or replace */
cache->nmisses++;
clines[lru] = addr;
latimes[lru] = cache->clock;
DONE:
//printf(" %"PRIu64" %"PRIu64"\n", cache->nhits, cache->clock);
return 1;
}
/*************************************************************************/
/*! This function returns the cache's hitrate
*/
/*************************************************************************/
double gk_cacheGetHitRate(gk_cache_t *cache)
{
return ((double)cache->nhits)/((double)(cache->clock+1));
}

@ -0,0 +1,5 @@
extern __thread int x;
int main(int argc, char **argv) {
return 0;
}

3378
csr.c

File diff suppressed because it is too large Load Diff

@ -0,0 +1,214 @@
/*!
\file error.c
\brief Various error-handling functions
This file contains functions dealing with error reporting and termination
\author George
\date 1/1/2007
\version\verbatim $Id: error.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#define _GK_ERROR_C_ /* this is needed to properly declare the gk_jub* variables
as an extern function in GKlib.h */
#include <GKlib.h>
/* These are the jmp_buf for the graceful exit in case of severe errors.
Multiple buffers are defined to allow for recursive invokation. */
#define MAX_JBUFS 128
__thread int gk_cur_jbufs=-1;
__thread jmp_buf gk_jbufs[MAX_JBUFS];
__thread jmp_buf gk_jbuf;
typedef void (*gksighandler_t)(int);
/* These are the holders of the old singal handlers for the trapped signals */
static __thread gksighandler_t old_SIGMEM_handler; /* Custom signal */
static __thread gksighandler_t old_SIGERR_handler; /* Custom signal */
static __thread gksighandler_t old_SIGMEM_handlers[MAX_JBUFS]; /* Custom signal */
static __thread gksighandler_t old_SIGERR_handlers[MAX_JBUFS]; /* Custom signal */
/* The following is used to control if the gk_errexit() will actually abort or not.
There is always a single copy of this variable */
static int gk_exit_on_error = 1;
/*************************************************************************/
/*! This function sets the gk_exit_on_error variable
*/
/*************************************************************************/
void gk_set_exit_on_error(int value)
{
gk_exit_on_error = value;
}
/*************************************************************************/
/*! This function prints an error message and exits
*/
/*************************************************************************/
void errexit(char *f_str,...)
{
va_list argp;
va_start(argp, f_str);
vfprintf(stderr, f_str, argp);
va_end(argp);
if (strlen(f_str) == 0 || f_str[strlen(f_str)-1] != '\n')
fprintf(stderr,"\n");
fflush(stderr);
if (gk_exit_on_error)
exit(-2);
/* abort(); */
}
/*************************************************************************/
/*! This function prints an error message and raises a signum signal
*/
/*************************************************************************/
void gk_errexit(int signum, char *f_str,...)
{
va_list argp;
va_start(argp, f_str);
vfprintf(stderr, f_str, argp);
va_end(argp);
fprintf(stderr,"\n");
fflush(stderr);
if (gk_exit_on_error)
raise(signum);
}
/***************************************************************************/
/*! This function sets a number of signal handlers and sets the return point
of a longjmp
*/
/***************************************************************************/
int gk_sigtrap()
{
if (gk_cur_jbufs+1 >= MAX_JBUFS)
return 0;
gk_cur_jbufs++;
old_SIGMEM_handlers[gk_cur_jbufs] = signal(SIGMEM, gk_sigthrow);
old_SIGERR_handlers[gk_cur_jbufs] = signal(SIGERR, gk_sigthrow);
return 1;
}
/***************************************************************************/
/*! This function sets the handlers for the signals to their default handlers
*/
/***************************************************************************/
int gk_siguntrap()
{
if (gk_cur_jbufs == -1)
return 0;
signal(SIGMEM, old_SIGMEM_handlers[gk_cur_jbufs]);
signal(SIGERR, old_SIGERR_handlers[gk_cur_jbufs]);
gk_cur_jbufs--;
return 1;
}
/*************************************************************************/
/*! This function is the custome signal handler, which all it does is to
perform a longjump to the most recent saved environment
*/
/*************************************************************************/
void gk_sigthrow(int signum)
{
longjmp(gk_jbufs[gk_cur_jbufs], signum);
}
/***************************************************************************
* This function sets a number of signal handlers and sets the return point
* of a longjmp
****************************************************************************/
void gk_SetSignalHandlers()
{
old_SIGMEM_handler = signal(SIGMEM, gk_NonLocalExit_Handler);
old_SIGERR_handler = signal(SIGERR, gk_NonLocalExit_Handler);
}
/***************************************************************************
* This function sets the handlers for the signals to their default handlers
****************************************************************************/
void gk_UnsetSignalHandlers()
{
signal(SIGMEM, old_SIGMEM_handler);
signal(SIGERR, old_SIGERR_handler);
}
/*************************************************************************
* This function is the handler for SIGUSR1 that implements the cleaning up
* process prior to a non-local exit.
**************************************************************************/
void gk_NonLocalExit_Handler(int signum)
{
longjmp(gk_jbuf, signum);
}
/*************************************************************************/
/*! \brief Thread-safe implementation of strerror() */
/**************************************************************************/
char *gk_strerror(int errnum)
{
#if defined(WIN32) || defined(__MINGW32__)
return strerror(errnum);
#else
#ifndef SUNOS
static __thread char buf[1024];
strerror_r(errnum, buf, 1024);
buf[1023] = '\0';
return buf;
#else
return strerror(errnum);
#endif
#endif
}
/*************************************************************************
* This function prints a backtrace of calling functions
**************************************************************************/
void PrintBackTrace()
{
#ifdef HAVE_EXECINFO_H
void *array[10];
int i, size;
char **strings;
size = backtrace(array, 10);
strings = backtrace_symbols(array, size);
printf("Obtained %d stack frames.\n", size);
for (i=0; i<size; i++) {
printf("%s\n", strings[i]);
}
free(strings);
#endif
}

@ -0,0 +1,132 @@
/*!
\file evaluate.c
\brief Various routines to evaluate classification performance
\author George
\date 9/23/2008
\version\verbatim $Id: evaluate.c 13328 2012-12-31 14:57:40Z karypis $ \endverbatim
*/
#include <GKlib.h>
/**********************************************************************
* This function computes the max accuracy score of a ranked list,
* given +1/-1 class list
**********************************************************************/
float ComputeAccuracy(int n, gk_fkv_t *list)
{
int i, P, N, TP, FN = 0;
float bAccuracy = 0.0;
float acc;
for (P=0, i=0;i<n;i++)
P += (list[i].val == 1? 1 : 0);
N = n - P;
TP = FN = 0;
for(i=0; i<n; i++){
if (list[i].val == 1)
TP++;
else
FN++;
acc = (TP + N - FN) * 100.0/ (P + N) ;
if (acc > bAccuracy)
bAccuracy = acc;
}
return bAccuracy;
}
/*****************************************************************************
* This function computes the ROC score of a ranked list, given a +1/-1 class
* list.
******************************************************************************/
float ComputeROCn(int n, int maxN, gk_fkv_t *list)
{
int i, P, TP, FP, TPprev, FPprev, AUC;
float prev;
FP = TP = FPprev = TPprev = AUC = 0;
prev = list[0].key -1;
for (P=0, i=0; i<n; i++)
P += (list[i].val == 1 ? 1 : 0);
for (i=0; i<n && FP < maxN; i++) {
if (list[i].key != prev) {
AUC += (TP+TPprev)*(FP-FPprev)/2;
prev = list[i].key;
FPprev = FP;
TPprev = TP;
}
if (list[i].val == 1)
TP++;
else {
FP++;
}
}
AUC += (TP+TPprev)*(FP-FPprev)/2;
return (TP*FP > 0 ? (float)(1.0*AUC/(P*FP)) : 0.0);
}
/*****************************************************************************
* This function computes the median rate of false positive for each positive
* instance.
******************************************************************************/
float ComputeMedianRFP(int n, gk_fkv_t *list)
{
int i, P, N, TP, FP;
P = N = 0;
for (i=0; i<n; i++) {
if (list[i].val == 1)
P++;
else
N++;
}
FP = TP = 0;
for (i=0; i<n && TP < (P+1)/2; i++) {
if (list[i].val == 1)
TP++;
else
FP++;
}
return 1.0*FP/N;
}
/*********************************************************
* Compute the mean
********************************************************/
float ComputeMean (int n, float *values)
{
int i;
float mean = 0.0;
for(i=0; i < n; i++)
mean += values[i];
return 1.0 * mean/ n;
}
/********************************************************
* Compute the standard deviation
********************************************************/
float ComputeStdDev(int n, float *values)
{
int i;
float mean = ComputeMean(n, values);
float stdDev = 0;
for(i=0;i<n;i++){
stdDev += (values[i] - mean)* (values[i] - mean);
}
return sqrt(1.0 * stdDev/n);
}

@ -0,0 +1,142 @@
/*!
\file dfkvkselect.c
\brief Sorts only the largest k values
\date Started 7/14/00
\author George
\version\verbatim $Id: fkvkselect.c 10711 2011-08-31 22:23:04Z karypis $\endverbatim
*/
#include <GKlib.h>
/* Byte-wise swap two items of size SIZE. */
#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0)
/******************************************************************************/
/*! This function puts the 'topk' largest values in the beginning of the array */
/*******************************************************************************/
int gk_dfkvkselect(size_t n, int topk, gk_fkv_t *cand)
{
int i, j, lo, hi, mid;
gk_fkv_t stmp;
float pivot;
if (n <= topk)
return n; /* return if the array has fewer elements than we want */
for (lo=0, hi=n-1; lo < hi;) {
mid = lo + ((hi-lo) >> 1);
/* select the median */
if (cand[lo].key < cand[mid].key)
mid = lo;
if (cand[hi].key > cand[mid].key)
mid = hi;
else
goto jump_over;
if (cand[lo].key < cand[mid].key)
mid = lo;
jump_over:
QSSWAP(cand[mid], cand[hi], stmp);
pivot = cand[hi].key;
/* the partitioning algorithm */
for (i=lo-1, j=lo; j<hi; j++) {
if (cand[j].key >= pivot) {
i++;
QSSWAP(cand[i], cand[j], stmp);
}
}
i++;
QSSWAP(cand[i], cand[hi], stmp);
if (i > topk)
hi = i-1;
else if (i < topk)
lo = i+1;
else
break;
}
/*
if (cand[lo].key < cand[hi].key)
printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key);
for (i=topk; i<n; i++) {
for (j=0; j<topk; j++)
if (cand[i].key > cand[j].key)
printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi);
}
*/
return topk;
}
/******************************************************************************/
/*! This function puts the 'topk' smallest values in the beginning of the array */
/*******************************************************************************/
int gk_ifkvkselect(size_t n, int topk, gk_fkv_t *cand)
{
int i, j, lo, hi, mid;
gk_fkv_t stmp;
float pivot;
if (n <= topk)
return n; /* return if the array has fewer elements than we want */
for (lo=0, hi=n-1; lo < hi;) {
mid = lo + ((hi-lo) >> 1);
/* select the median */
if (cand[lo].key > cand[mid].key)
mid = lo;
if (cand[hi].key < cand[mid].key)
mid = hi;
else
goto jump_over;
if (cand[lo].key > cand[mid].key)
mid = lo;
jump_over:
QSSWAP(cand[mid], cand[hi], stmp);
pivot = cand[hi].key;
/* the partitioning algorithm */
for (i=lo-1, j=lo; j<hi; j++) {
if (cand[j].key <= pivot) {
i++;
QSSWAP(cand[i], cand[j], stmp);
}
}
i++;
QSSWAP(cand[i], cand[hi], stmp);
if (i > topk)
hi = i-1;
else if (i < topk)
lo = i+1;
else
break;
}
/*
if (cand[lo].key > cand[hi].key)
printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key);
for (i=topk; i<n; i++) {
for (j=0; j<topk; j++)
if (cand[i].key < cand[j].key)
printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi);
}
*/
return topk;
}

225
fs.c

@ -0,0 +1,225 @@
/*!
\file fs.c
\brief Various file-system functions.
This file contains various functions that deal with interfacing with
the filesystem in a portable way.
\date Started 4/10/95
\author George
\version\verbatim $Id: fs.c 14332 2013-05-18 12:22:57Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************
* This function checks if a file exists
**************************************************************************/
int gk_fexists(char *fname)
{
struct stat status;
if (stat(fname, &status) == -1)
return 0;
return S_ISREG(status.st_mode);
}
/*************************************************************************
* This function checks if a directory exists
**************************************************************************/
int gk_dexists(char *dirname)
{
struct stat status;
if (stat(dirname, &status) == -1)
return 0;
return S_ISDIR(status.st_mode);
}
/*************************************************************************/
/*! \brief Returns the size of the file in bytes
This function returns the size of a file as a 64 bit integer. If there
were any errors in stat'ing the file, -1 is returned.
\note That due to the -1 return code, the maximum file size is limited to
63 bits (which I guess is okay for now).
*/
/**************************************************************************/
ssize_t gk_getfsize(char *filename)
{
struct stat status;
if (stat(filename, &status) == -1)
return -1;
return (size_t)(status.st_size);
}
/*************************************************************************/
/*! This function gets some basic statistics about the file.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
\param r_ntokens is the number of tokens in the file. If it is NULL,
this information is not returned.
\param r_max_nlntokens is the maximum number of tokens in any line
in the file. If it is NULL this information is not returned.
\param r_nbytes is the number of bytes in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens,
size_t *r_max_nlntokens, size_t *r_nbytes)
{
size_t nlines=0, ntokens=0, max_nlntokens=0, nbytes=0, oldntokens=0, nread;
int intoken=0;
char buffer[4097], *cptr;
FILE *fpin;
fpin = gk_fopen(fname, "r", "gk_GetFileStats");
while (!feof(fpin)) {
nread = fread(buffer, sizeof(char), 4096, fpin);
nbytes += nread;
buffer[nread] = '\0'; /* There is space for this one */
for (cptr=buffer; *cptr!='\0'; cptr++) {
if (*cptr == '\n') {
nlines++;
ntokens += intoken;
intoken = 0;
if (max_nlntokens < ntokens-oldntokens)
max_nlntokens = ntokens-oldntokens;
oldntokens = ntokens;
}
else if (*cptr == ' ' || *cptr == '\t') {
ntokens += intoken;
intoken = 0;
}
else {
intoken = 1;
}
}
}
ntokens += intoken;
if (max_nlntokens < ntokens-oldntokens)
max_nlntokens = ntokens-oldntokens;
gk_fclose(fpin);
if (r_nlines != NULL)
*r_nlines = nlines;
if (r_ntokens != NULL)
*r_ntokens = ntokens;
if (r_max_nlntokens != NULL)
*r_max_nlntokens = max_nlntokens;
if (r_nbytes != NULL)
*r_nbytes = nbytes;
}
/*************************************************************************
* This function takes in a potentially full path specification of a file
* and just returns a string containing just the basename of the file.
* The basename is derived from the actual filename by stripping the last
* .ext part.
**************************************************************************/
char *gk_getbasename(char *path)
{
char *startptr, *endptr;
char *basename;
if ((startptr = strrchr(path, '/')) == NULL)
startptr = path;
else
startptr = startptr+1;
basename = gk_strdup(startptr);
if ((endptr = strrchr(basename, '.')) != NULL)
*endptr = '\0';
return basename;
}
/*************************************************************************
* This function takes in a potentially full path specification of a file
* and just returns a string corresponding to its file extension. The
* extension of a file is considered to be the string right after the
* last '.' character.
**************************************************************************/
char *gk_getextname(char *path)
{
char *startptr;
if ((startptr = strrchr(path, '.')) == NULL)
return gk_strdup(path);
else
return gk_strdup(startptr+1);
}
/*************************************************************************
* This function takes in a potentially full path specification of a file
* and just returns a string containing just the filename.
**************************************************************************/
char *gk_getfilename(char *path)
{
char *startptr;
if ((startptr = strrchr(path, '/')) == NULL)
return gk_strdup(path);
else
return gk_strdup(startptr+1);
}
/*************************************************************************
* This function takes in a potentially full path specification of a file
* and extracts the directory path component if it exists, otherwise it
* returns "./" as the path. The memory for it is dynamically allocated.
**************************************************************************/
char *getpathname(char *path)
{
char *endptr, *tmp;
if ((endptr = strrchr(path, '/')) == NULL) {
return gk_strdup(".");
}
else {
tmp = gk_strdup(path);
*(strrchr(tmp, '/')) = '\0';
return tmp;
}
}
/*************************************************************************
* This function creates a path
**************************************************************************/
int gk_mkpath(char *pathname)
{
char tmp[2048];
sprintf(tmp, "mkdir -p %s", pathname);
return system(tmp);
}
/*************************************************************************
* This function deletes a directory tree and all of its contents
**************************************************************************/
int gk_rmpath(char *pathname)
{
char tmp[2048];
sprintf(tmp, "rm -r %s", pathname);
return system(tmp);
}

@ -0,0 +1,855 @@
/*************************************************************************/
/*! \file getopt.c
\brief Command line parsing
This file contains a implementation of GNU's Getopt facility. The purpose
for including it here is to ensure portability across different unix- and
windows-based systems.
\warning
The implementation provided here uses the \c gk_ prefix for all variables
used by the standard Getopt facility to communicate with the program.
So, do read the documentation here.
\verbatim
Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001
Free Software Foundation, Inc. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
\endverbatim
*/
/*************************************************************************/
#include <GKlib.h>
/*************************************************************************/
/* Local function prototypes */
/*************************************************************************/
static void exchange (char **);
static char *gk_getopt_initialize (int, char **, char *);
static int gk_getopt_internal(int argc, char **argv, char *optstring,
struct gk_option *longopts, int *longind, int long_only);
/*************************************************************************/
/*! \brief For communication arguments to the caller.
This variable is set by getopt to point at the value of the option argument,
for those options that accept arguments.
*/
/*************************************************************************/
char *gk_optarg;
/*************************************************************************/
/*! \brief Index in ARGV of the next element to be scanned.
This variable is set by getopt to the index of the next element of the argv
array to be processed. Once getopt has found all of the option arguments,
you can use this variable to determine where the remaining non-option arguments
begin.
*/
/*************************************************************************/
int gk_optind = 1;
/*************************************************************************/
/*! \brief Controls error reporting for unrecognized options.
If the value of this variable is nonzero, then getopt prints an error
message to the standard error stream if it encounters an unknown option
character or an option with a missing required argument. This is the default
behavior. If you set this variable to zero, getopt does not print any messages,
but it still returns the character ? to indicate an error.
*/
/*************************************************************************/
int gk_opterr = 1;
/*************************************************************************/
/*! \brief Stores unknown option characters
When getopt encounters an unknown option character or an option with a
missing required argument, it stores that option character in this
variable. You can use this for providing your own diagnostic messages.
*/
/*************************************************************************/
int gk_optopt = '?';
/*************************************************************************/
/*
Records that the getopt facility has been initialized.
*/
/*************************************************************************/
int gk_getopt_initialized;
/*************************************************************************/
/*
The next char to be scanned in the option-element in which the last option
character we returned was found. This allows us to pick up the scan where
we left off.
If this is zero, or a null string, it means resume the scan by advancing
to the next ARGV-element.
*/
/*************************************************************************/
static char *nextchar;
/*************************************************************************/
/*
Value of POSIXLY_CORRECT environment variable.
*/
/*************************************************************************/
static char *posixly_correct;
/*************************************************************************/
/*
Describe how to deal with options that follow non-option ARGV-elements.
If the caller did not specify anything, the default is REQUIRE_ORDER if
the environment variable POSIXLY_CORRECT is defined, PERMUTE otherwise.
REQUIRE_ORDER means don't recognize them as options; stop option processing
when the first non-option is seen. This is what Unix does. This mode of
operation is selected by either setting the environment variable
POSIXLY_CORRECT, or using `+' as the first character of the list of
option characters.
PERMUTE is the default. We permute the contents of ARGV as we scan, so
that eventually all the non-options are at the end. This allows options
to be given in any order, even with programs that were not written to
expect this.
RETURN_IN_ORDER is an option available to programs that were written
to expect options and other ARGV-elements in any order and that care
about the ordering of the two. We describe each non-option ARGV-element
as if it were the argument of an option with character code 1.
Using `-' as the first character of the list of option characters
selects this mode of operation.
The special argument `--' forces an end of option-scanning regardless
of the value of `ordering'. In the case of RETURN_IN_ORDER, only
`--' can cause `getopt' to return -1 with `gk_optind' != ARGC.
*/
/*************************************************************************/
static enum
{
REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
} ordering;
/*************************************************************************/
/*
Describe the part of ARGV that contains non-options that have
been skipped. `first_nonopt' is the index in ARGV of the first of them;
`last_nonopt' is the index after the last of them.
*/
/*************************************************************************/
static int first_nonopt;
static int last_nonopt;
/*************************************************************************/
/*
Handle permutation of arguments.
Exchange two adjacent subsequences of ARGV.
One subsequence is elements [first_nonopt,last_nonopt)
which contains all the non-options that have been skipped so far.
The other is elements [last_nonopt,gk_optind), which contains all
the options processed since those non-options were skipped.
`first_nonopt' and `last_nonopt' are relocated so that they describe
the new indices of the non-options in ARGV after they are moved.
*/
/*************************************************************************/
static void exchange (char **argv)
{
int bottom = first_nonopt;
int middle = last_nonopt;
int top = gk_optind;
char *tem;
/* Exchange the shorter segment with the far end of the longer segment.
That puts the shorter segment into the right place.
It leaves the longer segment in the right place overall,
but it consists of two parts that need to be swapped next. */
while (top > middle && middle > bottom) {
if (top - middle > middle - bottom) {
/* Bottom segment is the short one. */
int len = middle - bottom;
register int i;
/* Swap it with the top part of the top segment. */
for (i = 0; i < len; i++) {
tem = argv[bottom + i];
argv[bottom + i] = argv[top - (middle - bottom) + i];
argv[top - (middle - bottom) + i] = tem;
}
/* Exclude the moved bottom segment from further swapping. */
top -= len;
}
else {
/* Top segment is the short one. */
int len = top - middle;
register int i;
/* Swap it with the bottom part of the bottom segment. */
for (i = 0; i < len; i++) {
tem = argv[bottom + i];
argv[bottom + i] = argv[middle + i];
argv[middle + i] = tem;
}
/* Exclude the moved top segment from further swapping. */
bottom += len;
}
}
/* Update records for the slots the non-options now occupy. */
first_nonopt += (gk_optind - last_nonopt);
last_nonopt = gk_optind;
}
/*************************************************************************/
/*
Initialize the internal data when the first call is made.
*/
/*************************************************************************/
static char *gk_getopt_initialize (int argc, char **argv, char *optstring)
{
/* Start processing options with ARGV-element 1 (since ARGV-element 0
is the program name); the sequence of previously skipped
non-option ARGV-elements is empty. */
first_nonopt = last_nonopt = gk_optind;
nextchar = NULL;
posixly_correct = getenv("POSIXLY_CORRECT");
/* Determine how to handle the ordering of options and nonoptions. */
if (optstring[0] == '-') {
ordering = RETURN_IN_ORDER;
++optstring;
}
else if (optstring[0] == '+') {
ordering = REQUIRE_ORDER;
++optstring;
}
else if (posixly_correct != NULL)
ordering = REQUIRE_ORDER;
else
ordering = PERMUTE;
return optstring;
}
/*************************************************************************/
/*
Scan elements of ARGV (whose length is ARGC) for option characters
given in OPTSTRING.
If an element of ARGV starts with '-', and is not exactly "-" or "--",
then it is an option element. The characters of this element
(aside from the initial '-') are option characters. If `getopt'
is called repeatedly, it returns successively each of the option characters
from each of the option elements.
If `getopt' finds another option character, it returns that character,
updating `gk_optind' and `nextchar' so that the next call to `getopt' can
resume the scan with the following option character or ARGV-element.
If there are no more option characters, `getopt' returns -1.
Then `gk_optind' is the index in ARGV of the first ARGV-element
that is not an option. (The ARGV-elements have been permuted
so that those that are not options now come last.)
OPTSTRING is a string containing the legitimate option characters.
If an option character is seen that is not listed in OPTSTRING,
return '?' after printing an error message. If you set `gk_opterr' to
zero, the error message is suppressed but we still return '?'.
If a char in OPTSTRING is followed by a colon, that means it wants an arg,
so the following text in the same ARGV-element, or the text of the following
ARGV-element, is returned in `gk_optarg'. Two colons mean an option that
wants an optional arg; if there is text in the current ARGV-element,
it is returned in `gk_optarg', otherwise `gk_optarg' is set to zero.
If OPTSTRING starts with `-' or `+', it requests different methods of
handling the non-option ARGV-elements.
See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
Long-named options begin with `--' instead of `-'.
Their names may be abbreviated as long as the abbreviation is unique
or is an exact match for some defined option. If they have an
argument, it follows the option name in the same ARGV-element, separated
from the option name by a `=', or else the in next ARGV-element.
When `getopt' finds a long-named option, it returns 0 if that option's
`flag' field is nonzero, the value of the option's `val' field
if the `flag' field is zero.
LONGOPTS is a vector of `struct gk_option' terminated by an
element containing a name which is zero.
LONGIND returns the index in LONGOPT of the long-named option found.
It is only valid when a long-named option has been found by the most
recent call.
If LONG_ONLY is nonzero, '-' as well as '--' can introduce
long-named options.
*/
/*************************************************************************/
static int gk_getopt_internal(int argc, char **argv, char *optstring,
struct gk_option *longopts, int *longind, int long_only)
{
int print_errors = gk_opterr;
if (optstring[0] == ':')
print_errors = 0;
if (argc < 1)
return -1;
gk_optarg = NULL;
if (gk_optind == 0 || !gk_getopt_initialized) {
if (gk_optind == 0)
gk_optind = 1; /* Don't scan ARGV[0], the program name. */
optstring = gk_getopt_initialize (argc, argv, optstring);
gk_getopt_initialized = 1;
}
/* Test whether ARGV[gk_optind] points to a non-option argument.
Either it does not have option syntax, or there is an environment flag
from the shell indicating it is not an option. The later information
is only used when the used in the GNU libc. */
# define NONOPTION_P (argv[gk_optind][0] != '-' || argv[gk_optind][1] == '\0')
if (nextchar == NULL || *nextchar == '\0') {
/* Advance to the next ARGV-element. */
/* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
moved back by the user (who may also have changed the arguments). */
if (last_nonopt > gk_optind)
last_nonopt = gk_optind;
if (first_nonopt > gk_optind)
first_nonopt = gk_optind;
if (ordering == PERMUTE) {
/* If we have just processed some options following some non-options,
exchange them so that the options come first. */
if (first_nonopt != last_nonopt && last_nonopt != gk_optind)
exchange ((char **) argv);
else if (last_nonopt != gk_optind)
first_nonopt = gk_optind;
/* Skip any additional non-options
and extend the range of non-options previously skipped. */
while (gk_optind < argc && NONOPTION_P)
gk_optind++;
last_nonopt = gk_optind;
}
/* The special ARGV-element `--' means premature end of options.
Skip it like a null option,
then exchange with previous non-options as if it were an option,
then skip everything else like a non-option. */
if (gk_optind != argc && !strcmp (argv[gk_optind], "--")) {
gk_optind++;
if (first_nonopt != last_nonopt && last_nonopt != gk_optind)
exchange ((char **) argv);
else if (first_nonopt == last_nonopt)
first_nonopt = gk_optind;
last_nonopt = argc;
gk_optind = argc;
}
/* If we have done all the ARGV-elements, stop the scan
and back over any non-options that we skipped and permuted. */
if (gk_optind == argc) {
/* Set the next-arg-index to point at the non-options
that we previously skipped, so the caller will digest them. */
if (first_nonopt != last_nonopt)
gk_optind = first_nonopt;
return -1;
}
/* If we have come to a non-option and did not permute it,
either stop the scan or describe it to the caller and pass it by. */
if (NONOPTION_P) {
if (ordering == REQUIRE_ORDER)
return -1;
gk_optarg = argv[gk_optind++];
return 1;
}
/* We have found another option-ARGV-element.
Skip the initial punctuation. */
nextchar = (argv[gk_optind] + 1 + (longopts != NULL && argv[gk_optind][1] == '-'));
}
/* Decode the current option-ARGV-element. */
/* Check whether the ARGV-element is a long option.
If long_only and the ARGV-element has the form "-f", where f is
a valid short option, don't consider it an abbreviated form of
a long option that starts with f. Otherwise there would be no
way to give the -f short option.
On the other hand, if there's a long option "fubar" and
the ARGV-element is "-fu", do consider that an abbreviation of
the long option, just like "--fu", and not "-f" with arg "u".
This distinction seems to be the most useful approach. */
if (longopts != NULL && (argv[gk_optind][1] == '-' || (long_only && (argv[gk_optind][2] || !strchr(optstring, argv[gk_optind][1]))))) {
char *nameend;
struct gk_option *p;
struct gk_option *pfound = NULL;
int exact = 0;
int ambig = 0;
int indfound = -1;
int option_index;
for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
/* Do nothing. */ ;
/* Test all long options for either exact match or abbreviated matches. */
for (p = longopts, option_index = 0; p->name; p++, option_index++) {
if (!strncmp (p->name, nextchar, nameend - nextchar)) {
if ((unsigned int) (nameend - nextchar) == (unsigned int) strlen (p->name)) {
/* Exact match found. */
pfound = p;
indfound = option_index;
exact = 1;
break;
}
else if (pfound == NULL) {
/* First nonexact match found. */
pfound = p;
indfound = option_index;
}
else if (long_only || pfound->has_arg != p->has_arg || pfound->flag != p->flag || pfound->val != p->val)
/* Second or later nonexact match found. */
ambig = 1;
}
}
if (ambig && !exact) {
if (print_errors)
fprintf(stderr, "%s: option `%s' is ambiguous\n", argv[0], argv[gk_optind]);
nextchar += strlen (nextchar);
gk_optind++;
gk_optopt = 0;
return '?';
}
if (pfound != NULL) {
option_index = indfound;
gk_optind++;
if (*nameend) {
/* Don't test has_arg with >, because some C compilers don't allow it to be used on enums. */
if (pfound->has_arg)
gk_optarg = nameend + 1;
else {
if (print_errors) {
if (argv[gk_optind - 1][1] == '-')
/* --option */
fprintf(stderr, "%s: option `--%s' doesn't allow an argument\n", argv[0], pfound->name);
else
/* +option or -option */
fprintf(stderr, "%s: option `%c%s' doesn't allow an argument\n", argv[0], argv[gk_optind - 1][0], pfound->name);
}
nextchar += strlen (nextchar);
gk_optopt = pfound->val;
return '?';
}
}
else if (pfound->has_arg == 1) {
if (gk_optind < argc)
gk_optarg = argv[gk_optind++];
else {
if (print_errors)
fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]);
nextchar += strlen (nextchar);
gk_optopt = pfound->val;
return optstring[0] == ':' ? ':' : '?';
}
}
nextchar += strlen (nextchar);
if (longind != NULL)
*longind = option_index;
if (pfound->flag) {
*(pfound->flag) = pfound->val;
return 0;
}
return pfound->val;
}
/* Can't find it as a long option. If this is not getopt_long_only,
or the option starts with '--' or is not a valid short
option, then it's an error. Otherwise interpret it as a short option. */
if (!long_only || argv[gk_optind][1] == '-' || strchr(optstring, *nextchar) == NULL) {
if (print_errors) {
if (argv[gk_optind][1] == '-')
/* --option */
fprintf(stderr, "%s: unrecognized option `--%s'\n", argv[0], nextchar);
else
/* +option or -option */
fprintf(stderr, "%s: unrecognized option `%c%s'\n", argv[0], argv[gk_optind][0], nextchar);
}
nextchar = (char *) "";
gk_optind++;
gk_optopt = 0;
return '?';
}
}
/* Look at and handle the next short option-character. */
{
char c = *nextchar++;
char *temp = strchr(optstring, c);
/* Increment `gk_optind' when we start to process its last character. */
if (*nextchar == '\0')
++gk_optind;
if (temp == NULL || c == ':') {
if (print_errors) {
if (posixly_correct)
/* 1003.2 specifies the format of this message. */
fprintf(stderr, "%s: illegal option -- %c\n", argv[0], c);
else
fprintf(stderr, "%s: invalid option -- %c\n", argv[0], c);
}
gk_optopt = c;
return '?';
}
/* Convenience. Treat POSIX -W foo same as long option --foo */
if (temp[0] == 'W' && temp[1] == ';') {
char *nameend;
struct gk_option *p;
struct gk_option *pfound = NULL;
int exact = 0;
int ambig = 0;
int indfound = 0;
int option_index;
/* This is an option that requires an argument. */
if (*nextchar != '\0') {
gk_optarg = nextchar;
/* If we end this ARGV-element by taking the rest as an arg,
we must advance to the next element now. */
gk_optind++;
}
else if (gk_optind == argc) {
if (print_errors) {
/* 1003.2 specifies the format of this message. */
fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c);
}
gk_optopt = c;
if (optstring[0] == ':')
c = ':';
else
c = '?';
return c;
}
else
/* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument. */
gk_optarg = argv[gk_optind++];
/* gk_optarg is now the argument, see if it's in the table of longopts. */
for (nextchar = nameend = gk_optarg; *nameend && *nameend != '='; nameend++)
/* Do nothing. */ ;
/* Test all long options for either exact match or abbreviated matches. */
for (p = longopts, option_index = 0; p->name; p++, option_index++) {
if (!strncmp (p->name, nextchar, nameend - nextchar)) {
if ((unsigned int) (nameend - nextchar) == strlen (p->name)) {
/* Exact match found. */
pfound = p;
indfound = option_index;
exact = 1;
break;
}
else if (pfound == NULL) {
/* First nonexact match found. */
pfound = p;
indfound = option_index;
}
else
/* Second or later nonexact match found. */
ambig = 1;
}
}
if (ambig && !exact) {
if (print_errors)
fprintf(stderr, "%s: option `-W %s' is ambiguous\n", argv[0], argv[gk_optind]);
nextchar += strlen (nextchar);
gk_optind++;
return '?';
}
if (pfound != NULL) {
option_index = indfound;
if (*nameend) {
/* Don't test has_arg with >, because some C compilers don't allow it to be used on enums. */
if (pfound->has_arg)
gk_optarg = nameend + 1;
else {
if (print_errors)
fprintf(stderr, "%s: option `-W %s' doesn't allow an argument\n", argv[0], pfound->name);
nextchar += strlen (nextchar);
return '?';
}
}
else if (pfound->has_arg == 1) {
if (gk_optind < argc)
gk_optarg = argv[gk_optind++];
else {
if (print_errors)
fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]);
nextchar += strlen (nextchar);
return optstring[0] == ':' ? ':' : '?';
}
}
nextchar += strlen (nextchar);
if (longind != NULL)
*longind = option_index;
if (pfound->flag) {
*(pfound->flag) = pfound->val;
return 0;
}
return pfound->val;
}
nextchar = NULL;
return 'W'; /* Let the application handle it. */
}
if (temp[1] == ':') {
if (temp[2] == ':') {
/* This is an option that accepts an argument optionally. */
if (*nextchar != '\0') {
gk_optarg = nextchar;
gk_optind++;
}
else
gk_optarg = NULL;
nextchar = NULL;
}
else {
/* This is an option that requires an argument. */
if (*nextchar != '\0') {
gk_optarg = nextchar;
/* If we end this ARGV-element by taking the rest as an arg, we must advance to the next element now. */
gk_optind++;
}
else if (gk_optind == argc) {
if (print_errors) {
/* 1003.2 specifies the format of this message. */
fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c);
}
gk_optopt = c;
if (optstring[0] == ':')
c = ':';
else
c = '?';
}
else
/* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument. */
gk_optarg = argv[gk_optind++];
nextchar = NULL;
}
}
return c;
}
}
/*************************************************************************/
/*! \brief Parse command-line arguments
The gk_getopt() function gets the next option argument from the argument
list specified by the \c argv and \c argc arguments. Normally these values
come directly from the arguments received by main().
\param argc is the number of command line arguments passed to main().
\param argv is an array of strings storing the above command line
arguments.
\param options is a string that specifies the option characters that
are valid for this program. An option character in this string
can be followed by a colon (`:') to indicate that it takes a
required argument. If an option character is followed by two
colons (`::'), its argument is optional; this is a GNU extension.
\return
It returns the option character for the next command line option. When no
more option arguments are available, it returns -1. There may still be
more non-option arguments; you must compare the external variable
#gk_optind against the \c argc parameter to check this.
\return
If the option has an argument, gk_getopt() returns the argument by storing
it in the variable #gk_optarg. You don't ordinarily need to copy the
#gk_optarg string, since it is a pointer into the original \c argv array,
not into a static area that might be overwritten.
\return
If gk_getopt() finds an option character in \c argv that was not included
in options, or a missing option argument, it returns `?' and sets the
external variable #gk_optopt to the actual option character.
If the first character of options is a colon (`:'), then gk_getopt()
returns `:' instead of `?' to indicate a missing option argument.
In addition, if the external variable #gk_opterr is nonzero (which is
the default), gk_getopt() prints an error message. This variable is
set by gk_getopt() to point at the value of the option argument,
for those options that accept arguments.
gk_getopt() has three ways to deal with options that follow non-options
\c argv elements. The special argument <tt>`--'</tt> forces in all cases
the end of option scanning.
- The default is to permute the contents of \c argv while scanning it
so that eventually all the non-options are at the end. This allows
options to be given in any order, even with programs that were not
written to expect this.
- If the options argument string begins with a hyphen (`-'), this is
treated specially. It permits arguments that are not options to be
returned as if they were associated with option character `\\1'.
- POSIX demands the following behavior: The first non-option stops
option processing. This mode is selected by either setting the
environment variable POSIXLY_CORRECT or beginning the options
argument string with a plus sign (`+').
*/
/*************************************************************************/
int gk_getopt(int argc, char **argv, char *options)
{
return gk_getopt_internal(argc, argv, options, NULL, NULL, 0);
}
/*************************************************************************/
/*! \brief Parse command-line arguments with long options
This function accepts GNU-style long options as well as single-character
options.
\param argc is the number of command line arguments passed to main().
\param argv is an array of strings storing the above command line
arguments.
\param options describes the short options to accept, just as it does
in gk_getopt().
\param long_options describes the long options to accept. See the
defintion of ::gk_option for more information.
\param opt_index this is a returned variable. For any long option,
gk_getopt_long() tells you the index in the array \c long_options
of the options definition, by storing it into <tt>*opt_index</tt>.
You can get the name of the option with <tt>longopts[*opt_index].name</tt>.
So you can distinguish among long options either by the values
in their val fields or by their indices. You can also distinguish
in this way among long options that set flags.
\return
When gk_getopt_long() encounters a short option, it does the same thing
that gk_getopt() would do: it returns the character code for the option,
and stores the options argument (if it has one) in #gk_optarg.
\return
When gk_getopt_long() encounters a long option, it takes actions based
on the flag and val fields of the definition of that option.
\return
If flag is a null pointer, then gk_getopt_long() returns the contents
of val to indicate which option it found. You should arrange distinct
values in the val field for options with different meanings, so you
can decode these values after gk_getopt_long() returns. If the long
option is equivalent to a short option, you can use the short option's
character code in val.
\return
If flag is not a null pointer, that means this option should just set
a flag in the program. The flag is a variable of type int that you
define. Put the address of the flag in the flag field. Put in the
val field the value you would like this option to store in the flag.
In this case, gk_getopt_long() returns 0.
\return
When a long option has an argument, gk_getopt_long() puts the argument
value in the variable #gk_optarg before returning. When the option has
no argument, the value in #gk_optarg is a null pointer. This is
how you can tell whether an optional argument was supplied.
\return
When gk_getopt_long() has no more options to handle, it returns -1,
and leaves in the variable #gk_optind the index in argv of the next
remaining argument.
*/
/*************************************************************************/
int gk_getopt_long( int argc, char **argv, char *options,
struct gk_option *long_options, int *opt_index)
{
return gk_getopt_internal (argc, argv, options, long_options, opt_index, 0);
}
/*************************************************************************/
/*! \brief Parse command-line arguments with only long options
Like gk_getopt_long(), but '-' as well as '--' can indicate a long option.
If an option that starts with '-' (not '--') doesn't match a long option,
but does match a short option, it is parsed as a short option instead.
*/
/*************************************************************************/
int gk_getopt_long_only(int argc, char **argv, char *options,
struct gk_option *long_options, int *opt_index)
{
return gk_getopt_internal(argc, argv, options, long_options, opt_index, 1);
}

@ -0,0 +1,70 @@
/*!
\file gk_arch.h
\brief This file contains various architecture-specific declerations
\date Started 3/27/2007
\author George
\version\verbatim $Id: gk_arch.h 21637 2018-01-03 22:37:24Z karypis $ \endverbatim
*/
#ifndef _GK_ARCH_H_
#define _GK_ARCH_H_
/*************************************************************************
* Architecture-specific differences in header files
**************************************************************************/
#ifdef LINUX
#if !defined(__USE_XOPEN)
#define __USE_XOPEN
#endif
#if !defined(_XOPEN_SOURCE)
#define _XOPEN_SOURCE 600
#endif
#if !defined(__USE_XOPEN2K)
#define __USE_XOPEN2K
#endif
#endif
#ifdef HAVE_EXECINFO_H
#include <execinfo.h>
#endif
#ifdef __MSC__
#include "gk_ms_stdint.h"
#include "gk_ms_inttypes.h"
#include "gk_ms_stat.h"
#include "win32/adapt.h"
#else
#ifndef SUNOS
#include <stdint.h>
#endif
#include <inttypes.h>
#include <sys/types.h>
#ifndef __MINGW32__
#include <sys/resource.h>
#endif
#include <sys/time.h>
#include <unistd.h>
#endif
/*************************************************************************
* Architecture-specific modifications
**************************************************************************/
#ifdef WIN32
typedef ptrdiff_t ssize_t;
#endif
#ifdef SUNOS
#define PTRDIFF_MAX INT64_MAX
#endif
/* MSC does not have INFINITY defined */
#ifndef INFINITY
#define INFINITY FLT_MAX
#endif
#endif

@ -0,0 +1,87 @@
/*!
\file gk_defs.h
\brief This file contains various constants definitions
\date Started 3/27/2007
\author George
\version\verbatim $Id: gk_defs.h 22039 2018-05-26 16:34:48Z karypis $ \endverbatim
*/
#ifndef _GK_DEFS_H_
#define _GK_DEFS_H_
#define LTERM (void **) 0 /* List terminator for GKfree() */
/* mopt_t types */
#define GK_MOPT_MARK 1
#define GK_MOPT_CORE 2
#define GK_MOPT_HEAP 3
#define HTABLE_EMPTY -1
#define HTABLE_DELETED -2
#define HTABLE_FIRST 1
#define HTABLE_NEXT 2
/* pdb corruption bit switches */
#define CRP_ALTLOCS 1
#define CRP_MISSINGCA 2
#define CRP_MISSINGBB 4
#define CRP_MULTICHAIN 8
#define CRP_MULTICA 16
#define CRP_MULTIBB 32
#define MAXLINELEN 300000
/* GKlib signals to standard signal mapping */
#define SIGMEM SIGABRT
#define SIGERR SIGTERM
/* CSR-related defines */
#define GK_CSR_ROW 1
#define GK_CSR_COL 2
#define GK_CSR_ROWCOL 3
#define GK_CSR_MAXTF 1
#define GK_CSR_SQRT 2
#define GK_CSR_POW25 3
#define GK_CSR_POW65 4
#define GK_CSR_POW75 5
#define GK_CSR_POW85 6
#define GK_CSR_LOG 7
#define GK_CSR_IDF 8
#define GK_CSR_IDF2 9
#define GK_CSR_MAXTF2 10
#define GK_CSR_DOTP 1
#define GK_CSR_COS 2
#define GK_CSR_JAC 3
#define GK_CSR_MIN 4
#define GK_CSR_AMIN 5
#define GK_CSR_FMT_AUTO 2
#define GK_CSR_FMT_CLUTO 1
#define GK_CSR_FMT_CSR 2
#define GK_CSR_FMT_METIS 3
#define GK_CSR_FMT_BINROW 4
#define GK_CSR_FMT_BINCOL 5
#define GK_CSR_FMT_IJV 6
#define GK_CSR_FMT_BIJV 7
#define GK_CSR_SYM_SUM 1
#define GK_CSR_SYM_MIN 2
#define GK_CSR_SYM_MAX 3
#define GK_CSR_SYM_AVG 4
#define GK_GRAPH_FMT_METIS 1
#define GK_GRAPH_FMT_IJV 2
#define GK_GRAPH_FMT_HIJV 3
#define GK_GRAPH_SYM_SUM 1
#define GK_GRAPH_SYM_MIN 2
#define GK_GRAPH_SYM_MAX 3
#define GK_GRAPH_SYM_AVG 4
#endif

@ -0,0 +1,25 @@
/*!
\file gk_externs.h
\brief This file contains definitions of external variables created by GKlib
\date Started 3/27/2007
\author George
\version\verbatim $Id: gk_externs.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#ifndef _GK_EXTERNS_H_
#define _GK_EXTERNS_H_
/*************************************************************************
* Extern variable definition. Hopefully, the __thread makes them thread-safe.
**************************************************************************/
#ifndef _GK_ERROR_C_
/* declared in error.c */
extern __thread int gk_cur_jbufs;
extern __thread jmp_buf gk_jbufs[];
extern __thread jmp_buf gk_jbuf;
#endif
#endif

@ -0,0 +1,64 @@
/*!
\file gk_getopt.h
\brief This file contains GNU's externs/structs/prototypes
\date Started 3/27/2007
\author George
\version\verbatim $Id: gk_getopt.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#ifndef _GK_GETOPT_H_
#define _GK_GETOPT_H_
/* Externals from getopt.c */
extern char *gk_optarg;
extern int gk_optind;
extern int gk_opterr;
extern int gk_optopt;
/*! \brief The structure that stores the information about the command-line options
This structure describes a single long option name for the sake of
gk_getopt_long(). The argument <tt>long_options</tt> must be an array
of these structures, one for each long option. Terminate the array with
an element containing all zeros.
*/
struct gk_option {
char *name; /*!< This field is the name of the option. */
int has_arg; /*!< This field says whether the option takes an argument.
It is an integer, and there are three legitimate values:
no_argument, required_argument and optional_argument.
*/
int *flag; /*!< See the discussion on ::gk_option#val */
int val; /*!< These fields control how to report or act on the option
when it occurs.
If flag is a null pointer, then the val is a value which
identifies this option. Often these values are chosen
to uniquely identify particular long options.
If flag is not a null pointer, it should be the address
of an int variable which is the flag for this option.
The value in val is the value to store in the flag to
indicate that the option was seen. */
};
/* Names for the values of the `has_arg' field of `struct gk_option'. */
#define no_argument 0
#define required_argument 1
#define optional_argument 2
/* Function prototypes */
extern int gk_getopt(int argc, char **argv, char *shortopts);
extern int gk_getopt_long(int argc, char **argv, char *shortopts,
struct gk_option *longopts, int *longind);
extern int gk_getopt_long_only (int argc, char **argv,
char *shortopts, struct gk_option *longopts, int *longind);
#endif

@ -0,0 +1,169 @@
/*!
\file gk_macros.h
\brief This file contains various macros
\date Started 3/27/2007
\author George
\version\verbatim $Id: gk_macros.h 15048 2013-08-31 19:38:14Z karypis $ \endverbatim
*/
#ifndef _GK_MACROS_H_
#define _GK_MACROS_H_
/*-------------------------------------------------------------
* Usefull commands
*-------------------------------------------------------------*/
#define gk_max(a, b) ((a) >= (b) ? (a) : (b))
#define gk_min(a, b) ((a) >= (b) ? (b) : (a))
#define gk_max3(a, b, c) ((a) >= (b) && (a) >= (c) ? (a) : ((b) >= (a) && (b) >= (c) ? (b) : (c)))
#define gk_SWAP(a, b, tmp) do {(tmp) = (a); (a) = (b); (b) = (tmp);} while(0)
#define INC_DEC(a, b, val) do {(a) += (val); (b) -= (val);} while(0)
#define sign(a, b) ((a >= 0 ? b : -b))
#define ONEOVERRANDMAX (1.0/(RAND_MAX+1.0))
#define RandomInRange(u) ((int) (ONEOVERRANDMAX*(u)*rand()))
#define RandomInRange_r(s, u) ((int) (ONEOVERRANDMAX*(u)*rand_r(s)))
#define gk_abs(x) ((x) >= 0 ? (x) : -(x))
/*-------------------------------------------------------------
* Timing macros
*-------------------------------------------------------------*/
#define gk_clearcputimer(tmr) (tmr = 0.0)
#define gk_startcputimer(tmr) (tmr -= gk_CPUSeconds())
#define gk_stopcputimer(tmr) (tmr += gk_CPUSeconds())
#define gk_getcputimer(tmr) (tmr)
#define gk_clearwctimer(tmr) (tmr = 0.0)
#define gk_startwctimer(tmr) (tmr -= gk_WClockSeconds())
#define gk_stopwctimer(tmr) (tmr += gk_WClockSeconds())
#define gk_getwctimer(tmr) (tmr)
/*-------------------------------------------------------------
* dbglvl handling macros
*-------------------------------------------------------------*/
#define IFSET(a, flag, cmd) if ((a)&(flag)) (cmd);
/*-------------------------------------------------------------
* gracefull library exit macro
*-------------------------------------------------------------*/
#define GKSETJMP() (setjmp(gk_return_to_entry))
#define gk_sigcatch() (setjmp(gk_jbufs[gk_cur_jbufs]))
/*-------------------------------------------------------------
* Debuging memory leaks
*-------------------------------------------------------------*/
#ifdef DMALLOC
# define MALLOC_CHECK(ptr) \
if (malloc_verify((ptr)) == DMALLOC_VERIFY_ERROR) { \
printf("***MALLOC_CHECK failed on line %d of file %s: " #ptr "\n", \
__LINE__, __FILE__); \
abort(); \
}
#else
# define MALLOC_CHECK(ptr) ;
#endif
/*-------------------------------------------------------------
* CSR conversion macros
*-------------------------------------------------------------*/
#define MAKECSR(i, n, a) \
do { \
for (i=1; i<n; i++) a[i] += a[i-1]; \
for (i=n; i>0; i--) a[i] = a[i-1]; \
a[0] = 0; \
} while(0)
#define SHIFTCSR(i, n, a) \
do { \
for (i=n; i>0; i--) a[i] = a[i-1]; \
a[0] = 0; \
} while(0)
/*-------------------------------------------------------------
* ASSERTS that cannot be turned off!
*-------------------------------------------------------------*/
#define GKASSERT(expr) \
if (!(expr)) { \
printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
__LINE__, __FILE__); \
abort(); \
}
#define GKASSERTP(expr,msg) \
if (!(expr)) { \
printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
__LINE__, __FILE__); \
printf msg ; \
printf("\n"); \
abort(); \
}
#define GKCUASSERT(expr) \
if (!(expr)) { \
printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
__LINE__, __FILE__); \
}
#define GKWARN(expr) \
if (!(expr)) { \
printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
__LINE__, __FILE__); \
}
#define GKCUASSERTP(expr,msg) \
if (!(expr)) { \
printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
__LINE__, __FILE__); \
printf msg ; \
printf("\n"); \
}
#define GKWARNP(expr,msg) \
if (!(expr)) { \
printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
__LINE__, __FILE__); \
printf msg ; \
printf("\n"); \
}
/*-------------------------------------------------------------
* Program Assertions
*-------------------------------------------------------------*/
#ifndef NDEBUG
# define ASSERT(expr) \
if (!(expr)) { \
printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
__LINE__, __FILE__); \
assert(expr); \
}
# define ASSERTP(expr,msg) \
if (!(expr)) { \
printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
__LINE__, __FILE__); \
printf msg ; \
printf("\n"); \
assert(expr); \
}
#else
# define ASSERT(expr) ;
# define ASSERTP(expr,msg) ;
#endif
#ifndef NDEBUG2
# define ASSERT2 ASSERT
# define ASSERTP2 ASSERTP
#else
# define ASSERT2(expr) ;
# define ASSERTP2(expr,msg) ;
#endif
#endif

@ -0,0 +1,203 @@
/*!
\file gk_mkblas.h
\brief Templates for BLAS-like routines
\date Started 3/28/07
\author George
\version\verbatim $Id: gk_mkblas.h 16304 2014-02-25 14:27:19Z karypis $ \endverbatim
*/
#ifndef _GK_MKBLAS_H_
#define _GK_MKBLAS_H_
#define GK_MKBLAS(PRFX, TYPE, OUTTYPE) \
/*************************************************************************/\
/*! The macro for gk_?incset()-class of routines */\
/*************************************************************************/\
TYPE *PRFX ## incset(size_t n, TYPE baseval, TYPE *x)\
{\
size_t i;\
\
for (i=0; i<n; i++)\
x[i] = baseval+i;\
\
return x;\
}\
\
/*************************************************************************/\
/*! The macro for gk_?max()-class of routines */\
/*************************************************************************/\
TYPE PRFX ## max(size_t n, TYPE *x, size_t incx)\
{\
size_t i;\
TYPE max;\
\
if (n <= 0) return (TYPE) 0;\
\
for (max=(*x), x+=incx, i=1; i<n; i++, x+=incx)\
max = ((*x) > max ? (*x) : max);\
\
return max;\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?min()-class of routines */\
/*************************************************************************/\
TYPE PRFX ## min(size_t n, TYPE *x, size_t incx)\
{\
size_t i;\
TYPE min;\
\
if (n <= 0) return (TYPE) 0;\
\
for (min=(*x), x+=incx, i=1; i<n; i++, x+=incx)\
min = ((*x) < min ? (*x) : min);\
\
return min;\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?argmax()-class of routines */\
/*************************************************************************/\
size_t PRFX ## argmax(size_t n, TYPE *x, size_t incx)\
{\
size_t i, j, max=0;\
\
for (i=1, j=incx; i<n; i++, j+=incx)\
max = (x[j] > x[max] ? j : max);\
\
return (size_t)(max/incx);\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?argmin()-class of routines */\
/*************************************************************************/\
size_t PRFX ## argmin(size_t n, TYPE *x, size_t incx)\
{\
size_t i, j, min=0;\
\
for (i=1, j=incx; i<n; i++, j+=incx)\
min = (x[j] < x[min] ? j : min);\
\
return (size_t)(min/incx);\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?argmax_n()-class of routines */\
/*************************************************************************/\
size_t PRFX ## argmax_n(size_t n, TYPE *x, size_t incx, size_t k)\
{\
size_t i, j, max_n;\
PRFX ## kv_t *cand;\
\
cand = PRFX ## kvmalloc(n, "GK_ARGMAX_N: cand");\
\
for (i=0, j=0; i<n; i++, j+=incx) {\
cand[i].val = i;\
cand[i].key = x[j];\
}\
PRFX ## kvsortd(n, cand);\
\
max_n = cand[k-1].val;\
\
gk_free((void *)&cand, LTERM);\
\
return max_n;\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?sum()-class of routines */\
/**************************************************************************/\
OUTTYPE PRFX ## sum(size_t n, TYPE *x, size_t incx)\
{\
size_t i;\
OUTTYPE sum = 0;\
\
for (i=0; i<n; i++, x+=incx)\
sum += (*x);\
\
return sum;\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?scale()-class of routines */\
/**************************************************************************/\
TYPE *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx)\
{\
size_t i;\
\
for (i=0; i<n; i++, x+=incx)\
(*x) *= alpha;\
\
return x;\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?norm2()-class of routines */\
/**************************************************************************/\
OUTTYPE PRFX ## norm2(size_t n, TYPE *x, size_t incx)\
{\
size_t i;\
OUTTYPE partial = 0;\
\
for (i=0; i<n; i++, x+=incx)\
partial += (*x) * (*x);\
\
return (partial > 0 ? (OUTTYPE)sqrt((double)partial) : (OUTTYPE)0);\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?dot()-class of routines */\
/**************************************************************************/\
OUTTYPE PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy)\
{\
size_t i;\
OUTTYPE partial = 0.0;\
\
for (i=0; i<n; i++, x+=incx, y+=incy)\
partial += (*x) * (*y);\
\
return partial;\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?axpy()-class of routines */\
/**************************************************************************/\
TYPE *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy)\
{\
size_t i;\
TYPE *y_in = y;\
\
for (i=0; i<n; i++, x+=incx, y+=incy)\
*y += alpha*(*x);\
\
return y_in;\
}\
#define GK_MKBLAS_PROTO(PRFX, TYPE, OUTTYPE) \
TYPE *PRFX ## incset(size_t n, TYPE baseval, TYPE *x);\
TYPE PRFX ## max(size_t n, TYPE *x, size_t incx);\
TYPE PRFX ## min(size_t n, TYPE *x, size_t incx);\
size_t PRFX ## argmax(size_t n, TYPE *x, size_t incx);\
size_t PRFX ## argmin(size_t n, TYPE *x, size_t incx);\
size_t PRFX ## argmax_n(size_t n, TYPE *x, size_t incx, size_t k);\
OUTTYPE PRFX ## sum(size_t n, TYPE *x, size_t incx);\
TYPE *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx);\
OUTTYPE PRFX ## norm2(size_t n, TYPE *x, size_t incx);\
OUTTYPE PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy);\
TYPE *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy);\
#endif

@ -0,0 +1,142 @@
/*!
\file gk_mkmemory.h
\brief Templates for memory allocation routines
\date Started 3/29/07
\author George
\version\verbatim $Id: gk_mkmemory.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#ifndef _GK_MKMEMORY_H_
#define _GK_MKMEMORY_H_
#define GK_MKALLOC(PRFX, TYPE)\
/*************************************************************************/\
/*! The macro for gk_?malloc()-class of routines */\
/**************************************************************************/\
TYPE *PRFX ## malloc(size_t n, char *msg)\
{\
return (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?realloc()-class of routines */\
/**************************************************************************/\
TYPE *PRFX ## realloc(TYPE *ptr, size_t n, char *msg)\
{\
return (TYPE *)gk_realloc((void *)ptr, sizeof(TYPE)*n, msg);\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?smalloc()-class of routines */\
/**************************************************************************/\
TYPE *PRFX ## smalloc(size_t n, TYPE ival, char *msg)\
{\
TYPE *ptr;\
\
ptr = (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\
if (ptr == NULL) \
return NULL; \
\
return PRFX ## set(n, ival, ptr); \
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?set()-class of routines */\
/*************************************************************************/\
TYPE *PRFX ## set(size_t n, TYPE val, TYPE *x)\
{\
size_t i;\
\
for (i=0; i<n; i++)\
x[i] = val;\
\
return x;\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?set()-class of routines */\
/*************************************************************************/\
TYPE *PRFX ## copy(size_t n, TYPE *a, TYPE *b)\
{\
return (TYPE *)memmove((void *)b, (void *)a, sizeof(TYPE)*n);\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?AllocMatrix()-class of routines */\
/**************************************************************************/\
TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg)\
{\
gk_idx_t i, j;\
TYPE **matrix;\
\
matrix = (TYPE **)gk_malloc(ndim1*sizeof(TYPE *), errmsg);\
if (matrix == NULL) \
return NULL;\
\
for (i=0; i<ndim1; i++) { \
matrix[i] = PRFX ## smalloc(ndim2, value, errmsg);\
if (matrix[i] == NULL) { \
for (j=0; j<i; j++) \
gk_free((void **)&matrix[j], LTERM); \
return NULL; \
} \
}\
\
return matrix;\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?AllocMatrix()-class of routines */\
/**************************************************************************/\
void PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2)\
{\
gk_idx_t i;\
TYPE **matrix;\
\
if (*r_matrix == NULL) \
return; \
\
matrix = *r_matrix;\
\
for (i=0; i<ndim1; i++) \
gk_free((void **)&(matrix[i]), LTERM);\
\
gk_free((void **)r_matrix, LTERM);\
}\
\
\
/*************************************************************************/\
/*! The macro for gk_?SetMatrix()-class of routines */\
/**************************************************************************/\
void PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value)\
{\
gk_idx_t i, j;\
\
for (i=0; i<ndim1; i++) {\
for (j=0; j<ndim2; j++)\
matrix[i][j] = value;\
}\
}\
#define GK_MKALLOC_PROTO(PRFX, TYPE)\
TYPE *PRFX ## malloc(size_t n, char *msg);\
TYPE *PRFX ## realloc(TYPE *ptr, size_t n, char *msg);\
TYPE *PRFX ## smalloc(size_t n, TYPE ival, char *msg);\
TYPE *PRFX ## set(size_t n, TYPE val, TYPE *x);\
TYPE *PRFX ## copy(size_t n, TYPE *a, TYPE *b);\
TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg);\
void PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2);\
void PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value);\
#endif

@ -0,0 +1,440 @@
/*!
\file gk_mkpqueue.h
\brief Templates for priority queues
\date Started 4/09/07
\author George
\version\verbatim $Id: gk_mkpqueue.h 21742 2018-01-26 16:59:15Z karypis $ \endverbatim
*/
#ifndef _GK_MKPQUEUE_H
#define _GK_MKPQUEUE_H
#define GK_MKPQUEUE(FPRFX, PQT, KVT, KT, VT, KVMALLOC, KMAX, KEY_LT)\
/*************************************************************************/\
/*! This function creates and initializes a priority queue */\
/**************************************************************************/\
PQT *FPRFX ## Create(size_t maxnodes)\
{\
PQT *queue; \
\
queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate: queue");\
FPRFX ## Init(queue, maxnodes);\
\
return queue;\
}\
\
\
/*************************************************************************/\
/*! This function initializes the data structures of the priority queue */\
/**************************************************************************/\
void FPRFX ## Init(PQT *queue, size_t maxnodes)\
{\
queue->nnodes = 0;\
queue->maxnodes = maxnodes;\
\
queue->heap = KVMALLOC(maxnodes, "gk_PQInit: heap");\
queue->locator = gk_idxsmalloc(maxnodes, -1, "gk_PQInit: locator");\
}\
\
\
/*************************************************************************/\
/*! This function resets the priority queue */\
/**************************************************************************/\
void FPRFX ## Reset(PQT *queue)\
{\
ssize_t i;\
ssize_t *locator=queue->locator;\
KVT *heap=queue->heap;\
\
for (i=queue->nnodes-1; i>=0; i--)\
locator[heap[i].val] = -1;\
queue->nnodes = 0;\
}\
\
\
/*************************************************************************/\
/*! This function frees the internal datastructures of the priority queue */\
/**************************************************************************/\
void FPRFX ## Free(PQT *queue)\
{\
if (queue == NULL) return;\
gk_free((void **)&queue->heap, &queue->locator, LTERM);\
queue->maxnodes = 0;\
}\
\
\
/*************************************************************************/\
/*! This function frees the internal datastructures of the priority queue \
and the queue itself */\
/**************************************************************************/\
void FPRFX ## Destroy(PQT *queue)\
{\
if (queue == NULL) return;\
FPRFX ## Free(queue);\
gk_free((void **)&queue, LTERM);\
}\
\
\
/*************************************************************************/\
/*! This function returns the length of the queue */\
/**************************************************************************/\
size_t FPRFX ## Length(PQT *queue)\
{\
return queue->nnodes;\
}\
\
\
/*************************************************************************/\
/*! This function adds an item in the priority queue */\
/**************************************************************************/\
int FPRFX ## Insert(PQT *queue, VT node, KT key)\
{\
ssize_t i, j;\
ssize_t *locator=queue->locator;\
KVT *heap=queue->heap;\
\
ASSERT2(FPRFX ## CheckHeap(queue));\
\
ASSERT(locator[node] == -1);\
\
i = queue->nnodes++;\
while (i > 0) {\
j = (i-1)>>1;\
if (KEY_LT(key, heap[j].key)) {\
heap[i] = heap[j];\
locator[heap[i].val] = i;\
i = j;\
}\
else\
break;\
}\
ASSERT(i >= 0);\
heap[i].key = key;\
heap[i].val = node;\
locator[node] = i;\
\
ASSERT2(FPRFX ## CheckHeap(queue));\
\
return 0;\
}\
\
\
/*************************************************************************/\
/*! This function deletes an item from the priority queue */\
/**************************************************************************/\
int FPRFX ## Delete(PQT *queue, VT node)\
{\
ssize_t i, j;\
size_t nnodes;\
KT newkey, oldkey;\
ssize_t *locator=queue->locator;\
KVT *heap=queue->heap;\
\
ASSERT(locator[node] != -1);\
ASSERT(heap[locator[node]].val == node);\
\
ASSERT2(FPRFX ## CheckHeap(queue));\
\
i = locator[node];\
locator[node] = -1;\
\
if (--queue->nnodes > 0 && heap[queue->nnodes].val != node) {\
node = heap[queue->nnodes].val;\
newkey = heap[queue->nnodes].key;\
oldkey = heap[i].key;\
\
if (KEY_LT(newkey, oldkey)) { /* Filter-up */\
while (i > 0) {\
j = (i-1)>>1;\
if (KEY_LT(newkey, heap[j].key)) {\
heap[i] = heap[j];\
locator[heap[i].val] = i;\
i = j;\
}\
else\
break;\
}\
}\
else { /* Filter down */\
nnodes = queue->nnodes;\
while ((j=(i<<1)+1) < nnodes) {\
if (KEY_LT(heap[j].key, newkey)) {\
if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
j++;\
heap[i] = heap[j];\
locator[heap[i].val] = i;\
i = j;\
}\
else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\
j++;\
heap[i] = heap[j];\
locator[heap[i].val] = i;\
i = j;\
}\
else\
break;\
}\
}\
\
heap[i].key = newkey;\
heap[i].val = node;\
locator[node] = i;\
}\
\
ASSERT2(FPRFX ## CheckHeap(queue));\
\
return 0;\
}\
\
\
/*************************************************************************/\
/*! This function updates the key values associated for a particular item */ \
/**************************************************************************/\
void FPRFX ## Update(PQT *queue, VT node, KT newkey)\
{\
ssize_t i, j;\
size_t nnodes;\
KT oldkey;\
ssize_t *locator=queue->locator;\
KVT *heap=queue->heap;\
\
oldkey = heap[locator[node]].key;\
if (!KEY_LT(newkey, oldkey) && !KEY_LT(oldkey, newkey)) return;\
\
ASSERT(locator[node] != -1);\
ASSERT(heap[locator[node]].val == node);\
ASSERT2(FPRFX ## CheckHeap(queue));\
\
i = locator[node];\
\
if (KEY_LT(newkey, oldkey)) { /* Filter-up */\
while (i > 0) {\
j = (i-1)>>1;\
if (KEY_LT(newkey, heap[j].key)) {\
heap[i] = heap[j];\
locator[heap[i].val] = i;\
i = j;\
}\
else\
break;\
}\
}\
else { /* Filter down */\
nnodes = queue->nnodes;\
while ((j=(i<<1)+1) < nnodes) {\
if (KEY_LT(heap[j].key, newkey)) {\
if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
j++;\
heap[i] = heap[j];\
locator[heap[i].val] = i;\
i = j;\
}\
else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\
j++;\
heap[i] = heap[j];\
locator[heap[i].val] = i;\
i = j;\
}\
else\
break;\
}\
}\
\
heap[i].key = newkey;\
heap[i].val = node;\
locator[node] = i;\
\
ASSERT2(FPRFX ## CheckHeap(queue));\
\
return;\
}\
\
\
/*************************************************************************/\
/*! This function returns the item at the top of the queue and removes\
it from the priority queue */\
/**************************************************************************/\
VT FPRFX ## GetTop(PQT *queue)\
{\
ssize_t i, j;\
ssize_t *locator;\
KVT *heap;\
VT vtx, node;\
KT key;\
\
ASSERT2(FPRFX ## CheckHeap(queue));\
\
if (queue->nnodes == 0)\
return -1;\
\
queue->nnodes--;\
\
heap = queue->heap;\
locator = queue->locator;\
\
vtx = heap[0].val;\
locator[vtx] = -1;\
\
if ((i = queue->nnodes) > 0) {\
key = heap[i].key;\
node = heap[i].val;\
i = 0;\
while ((j=2*i+1) < queue->nnodes) {\
if (KEY_LT(heap[j].key, key)) {\
if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
j = j+1;\
heap[i] = heap[j];\
locator[heap[i].val] = i;\
i = j;\
}\
else if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, key)) {\
j = j+1;\
heap[i] = heap[j];\
locator[heap[i].val] = i;\
i = j;\
}\
else\
break;\
}\
\
heap[i].key = key;\
heap[i].val = node;\
locator[node] = i;\
}\
\
ASSERT2(FPRFX ## CheckHeap(queue));\
return vtx;\
}\
\
\
/*************************************************************************/\
/*! This function returns the item at the top of the queue. The item is not\
deleted from the queue. */\
/**************************************************************************/\
VT FPRFX ## SeeTopVal(PQT *queue)\
{\
return (queue->nnodes == 0 ? -1 : queue->heap[0].val);\
}\
\
\
/*************************************************************************/\
/*! This function returns the key of the top item. The item is not\
deleted from the queue. */\
/**************************************************************************/\
KT FPRFX ## SeeTopKey(PQT *queue)\
{\
return (queue->nnodes == 0 ? KMAX : queue->heap[0].key);\
}\
\
\
/*************************************************************************/\
/*! This function returns the key of a specific item */\
/**************************************************************************/\
KT FPRFX ## SeeKey(PQT *queue, VT node)\
{\
ssize_t *locator;\
KVT *heap;\
\
heap = queue->heap;\
locator = queue->locator;\
\
return heap[locator[node]].key;\
}\
\
\
/*************************************************************************/\
/*! This function returns the first item in a breadth-first traversal of\
the heap whose key is less than maxwgt. This function is here due to\
hMETIS and is not general!*/\
/**************************************************************************/\
/*\
VT FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts)\
{\
ssize_t i;\
\
if (queue->nnodes == 0)\
return -1;\
\
if (maxwgt <= 1000)\
return FPRFX ## SeeTopVal(queue);\
\
for (i=0; i<queue->nnodes; i++) {\
if (queue->heap[i].key > 0) {\
if (wgts[queue->heap[i].val] <= maxwgt)\
return queue->heap[i].val;\
}\
else {\
if (queue->heap[i/2].key <= 0)\
break;\
}\
}\
\
return queue->heap[0].val;\
\
}\
*/\
\
\
/*************************************************************************/\
/*! This functions checks the consistency of the heap */\
/**************************************************************************/\
int FPRFX ## CheckHeap(PQT *queue)\
{\
ssize_t i, j;\
size_t nnodes;\
ssize_t *locator;\
KVT *heap;\
\
heap = queue->heap;\
locator = queue->locator;\
nnodes = queue->nnodes;\
\
if (nnodes == 0)\
return 1;\
\
ASSERT(locator[heap[0].val] == 0);\
for (i=1; i<nnodes; i++) {\
ASSERT(locator[heap[i].val] == i);\
ASSERT(!KEY_LT(heap[i].key, heap[(i-1)/2].key));\
}\
for (i=1; i<nnodes; i++)\
ASSERT(!KEY_LT(heap[i].key, heap[0].key));\
\
for (j=i=0; i<queue->maxnodes; i++) {\
if (locator[i] != -1)\
j++;\
}\
ASSERTP(j == nnodes, ("%jd %jd\n", (intmax_t)j, (intmax_t)nnodes));\
\
return 1;\
}\
#define GK_MKPQUEUE_PROTO(FPRFX, PQT, KT, VT)\
PQT * FPRFX ## Create(size_t maxnodes);\
void FPRFX ## Init(PQT *queue, size_t maxnodes);\
void FPRFX ## Reset(PQT *queue);\
void FPRFX ## Free(PQT *queue);\
void FPRFX ## Destroy(PQT *queue);\
size_t FPRFX ## Length(PQT *queue);\
int FPRFX ## Insert(PQT *queue, VT node, KT key);\
int FPRFX ## Delete(PQT *queue, VT node);\
void FPRFX ## Update(PQT *queue, VT node, KT newkey);\
VT FPRFX ## GetTop(PQT *queue);\
VT FPRFX ## SeeTopVal(PQT *queue);\
KT FPRFX ## SeeTopKey(PQT *queue);\
KT FPRFX ## SeeKey(PQT *queue, VT node);\
VT FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts);\
int FPRFX ## CheckHeap(PQT *queue);\
/* This is how these macros are used
GK_MKPQUEUE(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX)
GK_MKPQUEUE_PROTO(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t)
*/
#endif

@ -0,0 +1,215 @@
/*!
\file gk_mkpqueue2.h
\brief Templates for priority queues that do not utilize locators and as such
they can use different types of values.
\date Started 4/09/07
\author George
\version\verbatim $Id: gk_mkpqueue2.h 13005 2012-10-23 22:34:36Z karypis $ \endverbatim
*/
#ifndef _GK_MKPQUEUE2_H
#define _GK_MKPQUEUE2_H
#define GK_MKPQUEUE2(FPRFX, PQT, KT, VT, KMALLOC, VMALLOC, KMAX, KEY_LT)\
/*************************************************************************/\
/*! This function creates and initializes a priority queue */\
/**************************************************************************/\
PQT *FPRFX ## Create2(ssize_t maxnodes)\
{\
PQT *queue; \
\
if ((queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate2: queue")) != NULL) {\
memset(queue, 0, sizeof(PQT));\
queue->nnodes = 0;\
queue->maxnodes = maxnodes;\
queue->keys = KMALLOC(maxnodes, "gk_pqCreate2: keys");\
queue->vals = VMALLOC(maxnodes, "gk_pqCreate2: vals");\
\
if (queue->keys == NULL || queue->vals == NULL)\
gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\
}\
\
return queue;\
}\
\
\
/*************************************************************************/\
/*! This function resets the priority queue */\
/**************************************************************************/\
void FPRFX ## Reset2(PQT *queue)\
{\
queue->nnodes = 0;\
}\
\
\
/*************************************************************************/\
/*! This function frees the internal datastructures of the priority queue */\
/**************************************************************************/\
void FPRFX ## Destroy2(PQT **r_queue)\
{\
PQT *queue = *r_queue; \
if (queue == NULL) return;\
gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\
*r_queue = NULL;\
}\
\
\
/*************************************************************************/\
/*! This function returns the length of the queue */\
/**************************************************************************/\
size_t FPRFX ## Length2(PQT *queue)\
{\
return queue->nnodes;\
}\
\
\
/*************************************************************************/\
/*! This function adds an item in the priority queue. */\
/**************************************************************************/\
int FPRFX ## Insert2(PQT *queue, VT val, KT key)\
{\
ssize_t i, j;\
KT *keys=queue->keys;\
VT *vals=queue->vals;\
\
ASSERT2(FPRFX ## CheckHeap2(queue));\
\
if (queue->nnodes == queue->maxnodes) \
return 0;\
\
ASSERT2(FPRFX ## CheckHeap2(queue));\
\
i = queue->nnodes++;\
while (i > 0) {\
j = (i-1)>>1;\
if (KEY_LT(key, keys[j])) {\
keys[i] = keys[j];\
vals[i] = vals[j];\
i = j;\
}\
else\
break;\
}\
ASSERT(i >= 0);\
keys[i] = key;\
vals[i] = val;\
\
ASSERT2(FPRFX ## CheckHeap2(queue));\
\
return 1;\
}\
\
\
/*************************************************************************/\
/*! This function returns the item at the top of the queue and removes\
it from the priority queue */\
/**************************************************************************/\
int FPRFX ## GetTop2(PQT *queue, VT *r_val)\
{\
ssize_t i, j;\
KT key, *keys=queue->keys;\
VT val, *vals=queue->vals;\
\
ASSERT2(FPRFX ## CheckHeap2(queue));\
\
if (queue->nnodes == 0)\
return 0;\
\
queue->nnodes--;\
\
*r_val = vals[0];\
\
if ((i = queue->nnodes) > 0) {\
key = keys[i];\
val = vals[i];\
i = 0;\
while ((j=2*i+1) < queue->nnodes) {\
if (KEY_LT(keys[j], key)) {\
if (j+1 < queue->nnodes && KEY_LT(keys[j+1], keys[j]))\
j = j+1;\
keys[i] = keys[j];\
vals[i] = vals[j];\
i = j;\
}\
else if (j+1 < queue->nnodes && KEY_LT(keys[j+1], key)) {\
j = j+1;\
keys[i] = keys[j];\
vals[i] = vals[j];\
i = j;\
}\
else\
break;\
}\
\
keys[i] = key;\
vals[i] = val;\
}\
\
ASSERT2(FPRFX ## CheckHeap2(queue));\
\
return 1;\
}\
\
\
/*************************************************************************/\
/*! This function returns the item at the top of the queue. The item is not\
deleted from the queue. */\
/**************************************************************************/\
int FPRFX ## SeeTopVal2(PQT *queue, VT *r_val)\
{\
if (queue->nnodes == 0) \
return 0;\
\
*r_val = queue->vals[0];\
\
return 1;\
}\
\
\
/*************************************************************************/\
/*! This function returns the key of the top item. The item is not\
deleted from the queue. */\
/**************************************************************************/\
KT FPRFX ## SeeTopKey2(PQT *queue)\
{\
return (queue->nnodes == 0 ? KMAX : queue->keys[0]);\
}\
\
\
/*************************************************************************/\
/*! This functions checks the consistency of the heap */\
/**************************************************************************/\
int FPRFX ## CheckHeap2(PQT *queue)\
{\
ssize_t i;\
KT *keys=queue->keys;\
\
if (queue->nnodes == 0)\
return 1;\
\
for (i=1; i<queue->nnodes; i++) {\
ASSERT(!KEY_LT(keys[i], keys[(i-1)/2]));\
}\
for (i=1; i<queue->nnodes; i++)\
ASSERT(!KEY_LT(keys[i], keys[0]));\
\
return 1;\
}\
#define GK_MKPQUEUE2_PROTO(FPRFX, PQT, KT, VT)\
PQT * FPRFX ## Create2(ssize_t maxnodes);\
void FPRFX ## Reset2(PQT *queue);\
void FPRFX ## Destroy2(PQT **r_queue);\
size_t FPRFX ## Length2(PQT *queue);\
int FPRFX ## Insert2(PQT *queue, VT node, KT key);\
int FPRFX ## GetTop2(PQT *queue, VT *r_val);\
int FPRFX ## SeeTopVal2(PQT *queue, VT *r_val);\
KT FPRFX ## SeeTopKey2(PQT *queue);\
int FPRFX ## CheckHeap2(PQT *queue);\
#endif

@ -0,0 +1,123 @@
/*!
\file
\brief Templates for portable random number generation
\date Started 5/17/07
\author George
\version\verbatim $Id: gk_mkrandom.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#ifndef _GK_MKRANDOM_H
#define _GK_MKRANDOM_H
/*************************************************************************/\
/*! The generator for the rand() related routines. \
\params RNGT the datatype that defines the range of values over which\
random numbers will be generated\
\params VALT the datatype that defines the contents of the array to \
be permuted by randArrayPermute() \
\params FPRFX the function prefix \
*/\
/**************************************************************************/\
#define GK_MKRANDOM(FPRFX, RNGT, VALT)\
/*************************************************************************/\
/*! Initializes the generator */ \
/**************************************************************************/\
void FPRFX ## srand(RNGT seed) \
{\
gk_randinit((uint64_t) seed);\
}\
\
\
/*************************************************************************/\
/*! Returns a random number */ \
/**************************************************************************/\
RNGT FPRFX ## rand() \
{\
if (sizeof(RNGT) <= sizeof(int32_t)) \
return (RNGT)gk_randint32(); \
else \
return (RNGT)gk_randint64(); \
}\
\
\
/*************************************************************************/\
/*! Returns a random number between [0, max) */ \
/**************************************************************************/\
RNGT FPRFX ## randInRange(RNGT max) \
{\
return (RNGT)((FPRFX ## rand())%max); \
}\
\
\
/*************************************************************************/\
/*! Randomly permutes the elements of an array p[]. \
flag == 1, p[i] = i prior to permutation, \
flag == 0, p[] is not initialized. */\
/**************************************************************************/\
void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag)\
{\
RNGT i, u, v;\
VALT tmp;\
\
if (flag == 1) {\
for (i=0; i<n; i++)\
p[i] = (VALT)i;\
}\
\
if (n < 10) {\
for (i=0; i<n; i++) {\
v = FPRFX ## randInRange(n);\
u = FPRFX ## randInRange(n);\
gk_SWAP(p[v], p[u], tmp);\
}\
}\
else {\
for (i=0; i<nshuffles; i++) {\
v = FPRFX ## randInRange(n-3);\
u = FPRFX ## randInRange(n-3);\
/*gk_SWAP(p[v+0], p[u+0], tmp);*/\
/*gk_SWAP(p[v+1], p[u+1], tmp);*/\
/*gk_SWAP(p[v+2], p[u+2], tmp);*/\
/*gk_SWAP(p[v+3], p[u+3], tmp);*/\
gk_SWAP(p[v+0], p[u+2], tmp);\
gk_SWAP(p[v+1], p[u+3], tmp);\
gk_SWAP(p[v+2], p[u+0], tmp);\
gk_SWAP(p[v+3], p[u+1], tmp);\
}\
}\
}\
\
\
/*************************************************************************/\
/*! Randomly permutes the elements of an array p[]. \
flag == 1, p[i] = i prior to permutation, \
flag == 0, p[] is not initialized. */\
/**************************************************************************/\
void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag)\
{\
RNGT i, v;\
VALT tmp;\
\
if (flag == 1) {\
for (i=0; i<n; i++)\
p[i] = (VALT)i;\
}\
\
for (i=0; i<n; i++) {\
v = FPRFX ## randInRange(n);\
gk_SWAP(p[i], p[v], tmp);\
}\
}\
#define GK_MKRANDOM_PROTO(FPRFX, RNGT, VALT)\
void FPRFX ## srand(RNGT seed); \
RNGT FPRFX ## rand(); \
RNGT FPRFX ## randInRange(RNGT max); \
void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag);\
void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag);\
#endif

@ -0,0 +1,271 @@
/*!
\file gk_mksort.h
\brief Templates for the qsort routine
\date Started 3/28/07
\author George
\version\verbatim $Id: gk_mksort.h 21051 2017-05-25 04:36:14Z karypis $ \endverbatim
*/
#ifndef _GK_MKSORT_H_
#define _GK_MKSORT_H_
/* Adopted from GNU glibc by Mjt.
* See stdlib/qsort.c in glibc */
/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
/* in-line qsort implementation. Differs from traditional qsort() routine
* in that it is a macro, not a function, and instead of passing an address
* of a comparision routine to the function, it is possible to inline
* comparision routine, thus speed up sorting alot.
*
* Usage:
* #include "iqsort.h"
* #define islt(a,b) (strcmp((*a),(*b))<0)
* char *arr[];
* int n;
* GKQSORT(char*, arr, n, islt);
*
* The "prototype" and 4 arguments are:
* GKQSORT(TYPE,BASE,NELT,ISLT)
* 1) type of each element, TYPE,
* 2) address of the beginning of the array, of type TYPE*,
* 3) number of elements in the array, and
* 4) comparision routine.
* Array pointer and number of elements are referenced only once.
* This is similar to a call
* qsort(BASE,NELT,sizeof(TYPE),ISLT)
* with the difference in last parameter.
* Note the islt macro/routine (it receives pointers to two elements):
* the only condition of interest is whenever one element is less than
* another, no other conditions (greather than, equal to etc) are tested.
* So, for example, to define integer sort, use:
* #define islt(a,b) ((*a)<(*b))
* GKQSORT(int, arr, n, islt)
*
* The macro could be used to implement a sorting function (see examples
* below), or to implement the sorting algorithm inline. That is, either
* create a sorting function and use it whenever you want to sort something,
* or use GKQSORT() macro directly instead a call to such routine. Note that
* the macro expands to quite some code (compiled size of int qsort on x86
* is about 700..800 bytes).
*
* Using this macro directly it isn't possible to implement traditional
* qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE),
* while qsort() allows element size to be different.
*
* Several ready-to-use examples:
*
* Sorting array of integers:
* void int_qsort(int *arr, unsigned n) {
* #define int_lt(a,b) ((*a)<(*b))
* GKQSORT(int, arr, n, int_lt);
* }
*
* Sorting array of string pointers:
* void str_qsort(char *arr[], unsigned n) {
* #define str_lt(a,b) (strcmp((*a),(*b)) < 0)
* GKQSORT(char*, arr, n, str_lt);
* }
*
* Sorting array of structures:
*
* struct elt {
* int key;
* ...
* };
* void elt_qsort(struct elt *arr, unsigned n) {
* #define elt_lt(a,b) ((a)->key < (b)->key)
* GKQSORT(struct elt, arr, n, elt_lt);
* }
*
* And so on.
*/
/* Swap two items pointed to by A and B using temporary buffer t. */
#define _GKQSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t)))
/* Discontinue quicksort algorithm when partition gets below this size. */
#define _GKQSORT_MAX_THRESH 8
/* The next 4 #defines implement a very fast in-line stack abstraction. */
#define _GKQSORT_STACK_SIZE (8 * sizeof(size_t))
#define _GKQSORT_PUSH(top, low, high) (((top->_lo = (low)), (top->_hi = (high)), ++top))
#define _GKQSORT_POP(low, high, top) ((--top, (low = top->_lo), (high = top->_hi)))
#define _GKQSORT_STACK_NOT_EMPTY (_stack < _top)
/* The main code starts here... */
#define GK_MKQSORT(GKQSORT_TYPE,GKQSORT_BASE,GKQSORT_NELT,GKQSORT_LT) \
{ \
GKQSORT_TYPE *const _base = (GKQSORT_BASE); \
const size_t _elems = (GKQSORT_NELT); \
GKQSORT_TYPE _hold; \
\
if (_elems < 1) \
return; \
\
/* Don't declare two variables of type GKQSORT_TYPE in a single \
* statement: eg `TYPE a, b;', in case if TYPE is a pointer, \
* expands to `type* a, b;' wich isn't what we want. \
*/ \
\
if (_elems > _GKQSORT_MAX_THRESH) { \
GKQSORT_TYPE *_lo = _base; \
GKQSORT_TYPE *_hi = _lo + _elems - 1; \
struct { \
GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo; \
} _stack[_GKQSORT_STACK_SIZE], *_top = _stack + 1; \
\
while (_GKQSORT_STACK_NOT_EMPTY) { \
GKQSORT_TYPE *_left_ptr; GKQSORT_TYPE *_right_ptr; \
\
/* Select median value from among LO, MID, and HI. Rearrange \
LO and HI so the three values are sorted. This lowers the \
probability of picking a pathological pivot value and \
skips a comparison for both the LEFT_PTR and RIGHT_PTR in \
the while loops. */ \
\
GKQSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1); \
\
if (GKQSORT_LT (_mid, _lo)) \
_GKQSORT_SWAP (_mid, _lo, _hold); \
if (GKQSORT_LT (_hi, _mid)) \
_GKQSORT_SWAP (_mid, _hi, _hold); \
else \
goto _jump_over; \
if (GKQSORT_LT (_mid, _lo)) \
_GKQSORT_SWAP (_mid, _lo, _hold); \
_jump_over:; \
\
_left_ptr = _lo + 1; \
_right_ptr = _hi - 1; \
\
/* Here's the famous ``collapse the walls'' section of quicksort. \
Gotta like those tight inner loops! They are the main reason \
that this algorithm runs much faster than others. */ \
do { \
while (GKQSORT_LT (_left_ptr, _mid)) \
++_left_ptr; \
\
while (GKQSORT_LT (_mid, _right_ptr)) \
--_right_ptr; \
\
if (_left_ptr < _right_ptr) { \
_GKQSORT_SWAP (_left_ptr, _right_ptr, _hold); \
if (_mid == _left_ptr) \
_mid = _right_ptr; \
else if (_mid == _right_ptr) \
_mid = _left_ptr; \
++_left_ptr; \
--_right_ptr; \
} \
else if (_left_ptr == _right_ptr) { \
++_left_ptr; \
--_right_ptr; \
break; \
} \
} while (_left_ptr <= _right_ptr); \
\
/* Set up pointers for next iteration. First determine whether \
left and right partitions are below the threshold size. If so, \
ignore one or both. Otherwise, push the larger partition's \
bounds on the stack and continue sorting the smaller one. */ \
\
if (_right_ptr - _lo <= _GKQSORT_MAX_THRESH) { \
if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH) \
/* Ignore both small partitions. */ \
_GKQSORT_POP (_lo, _hi, _top); \
else \
/* Ignore small left partition. */ \
_lo = _left_ptr; \
} \
else if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH) \
/* Ignore small right partition. */ \
_hi = _right_ptr; \
else if (_right_ptr - _lo > _hi - _left_ptr) { \
/* Push larger left partition indices. */ \
_GKQSORT_PUSH (_top, _lo, _right_ptr); \
_lo = _left_ptr; \
} \
else { \
/* Push larger right partition indices. */ \
_GKQSORT_PUSH (_top, _left_ptr, _hi); \
_hi = _right_ptr; \
} \
} \
} \
\
/* Once the BASE array is partially sorted by quicksort the rest \
is completely sorted using insertion sort, since this is efficient \
for partitions below MAX_THRESH size. BASE points to the \
beginning of the array to sort, and END_PTR points at the very \
last element in the array (*not* one beyond it!). */ \
\
{ \
GKQSORT_TYPE *const _end_ptr = _base + _elems - 1; \
GKQSORT_TYPE *_tmp_ptr = _base; \
register GKQSORT_TYPE *_run_ptr; \
GKQSORT_TYPE *_thresh; \
\
_thresh = _base + _GKQSORT_MAX_THRESH; \
if (_thresh > _end_ptr) \
_thresh = _end_ptr; \
\
/* Find smallest element in first threshold and place it at the \
array's beginning. This is the smallest array element, \
and the operation speeds up insertion sort's inner loop. */ \
\
for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr) \
if (GKQSORT_LT (_run_ptr, _tmp_ptr)) \
_tmp_ptr = _run_ptr; \
\
if (_tmp_ptr != _base) \
_GKQSORT_SWAP (_tmp_ptr, _base, _hold); \
\
/* Insertion sort, running from left-hand-side \
* up to right-hand-side. */ \
\
_run_ptr = _base + 1; \
while (++_run_ptr <= _end_ptr) { \
_tmp_ptr = _run_ptr - 1; \
while (GKQSORT_LT (_run_ptr, _tmp_ptr)) \
--_tmp_ptr; \
\
++_tmp_ptr; \
if (_tmp_ptr != _run_ptr) { \
GKQSORT_TYPE *_trav = _run_ptr + 1; \
while (--_trav >= _run_ptr) { \
GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo; \
_hold = *_trav; \
\
for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo) \
*_hi = *_lo; \
*_hi = _hold; \
} \
} \
} \
} \
\
}
#endif

@ -0,0 +1,40 @@
/*!
\file
\brief Templates for various utility routines
\date Started 5/28/07
\author George
\version\verbatim $Id: gk_mkutils.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#ifndef _GK_MKUTILS_H_
#define _GK_MKUTILS_H_
#define GK_MKARRAY2CSR(PRFX, TYPE)\
/*************************************************************************/\
/*! The macro for gk_?array2csr() routine */\
/**************************************************************************/\
void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind)\
{\
TYPE i;\
\
for (i=0; i<=range; i++)\
ptr[i] = 0;\
\
for (i=0; i<n; i++)\
ptr[array[i]]++;\
\
/* Compute the ptr, ind structure */\
MAKECSR(i, range, ptr);\
for (i=0; i<n; i++)\
ind[ptr[array[i]]++] = i;\
SHIFTCSR(i, range, ptr);\
}
#define GK_MKARRAY2CSR_PROTO(PRFX, TYPE)\
void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind);\
#endif

@ -0,0 +1,301 @@
// ISO C9x compliant inttypes.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the author may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_INTTYPES_H_ // [
#define _MSC_INTTYPES_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include "gk_ms_stdint.h"
// 7.8 Format conversion of integer types
typedef struct {
intmax_t quot;
intmax_t rem;
} imaxdiv_t;
// 7.8.1 Macros for format specifiers
// The fprintf macros for signed integers are:
#define PRId8 "d"
#define PRIi8 "i"
#define PRIdLEAST8 "d"
#define PRIiLEAST8 "i"
#define PRIdFAST8 "d"
#define PRIiFAST8 "i"
#define PRId16 "hd"
#define PRIi16 "hi"
#define PRIdLEAST16 "hd"
#define PRIiLEAST16 "hi"
#define PRIdFAST16 "hd"
#define PRIiFAST16 "hi"
#define PRId32 "I32d"
#define PRIi32 "I32i"
#define PRIdLEAST32 "I32d"
#define PRIiLEAST32 "I32i"
#define PRIdFAST32 "I32d"
#define PRIiFAST32 "I32i"
#define PRId64 "I64d"
#define PRIi64 "I64i"
#define PRIdLEAST64 "I64d"
#define PRIiLEAST64 "I64i"
#define PRIdFAST64 "I64d"
#define PRIiFAST64 "I64i"
#define PRIdMAX "I64d"
#define PRIiMAX "I64i"
#define PRIdPTR "Id"
#define PRIiPTR "Ii"
// The fprintf macros for unsigned integers are:
#define PRIo8 "o"
#define PRIu8 "u"
#define PRIx8 "x"
#define PRIX8 "X"
#define PRIoLEAST8 "o"
#define PRIuLEAST8 "u"
#define PRIxLEAST8 "x"
#define PRIXLEAST8 "X"
#define PRIoFAST8 "o"
#define PRIuFAST8 "u"
#define PRIxFAST8 "x"
#define PRIXFAST8 "X"
#define PRIo16 "ho"
#define PRIu16 "hu"
#define PRIx16 "hx"
#define PRIX16 "hX"
#define PRIoLEAST16 "ho"
#define PRIuLEAST16 "hu"
#define PRIxLEAST16 "hx"
#define PRIXLEAST16 "hX"
#define PRIoFAST16 "ho"
#define PRIuFAST16 "hu"
#define PRIxFAST16 "hx"
#define PRIXFAST16 "hX"
#define PRIo32 "I32o"
#define PRIu32 "I32u"
#define PRIx32 "I32x"
#define PRIX32 "I32X"
#define PRIoLEAST32 "I32o"
#define PRIuLEAST32 "I32u"
#define PRIxLEAST32 "I32x"
#define PRIXLEAST32 "I32X"
#define PRIoFAST32 "I32o"
#define PRIuFAST32 "I32u"
#define PRIxFAST32 "I32x"
#define PRIXFAST32 "I32X"
#define PRIo64 "I64o"
#define PRIu64 "I64u"
#define PRIx64 "I64x"
#define PRIX64 "I64X"
#define PRIoLEAST64 "I64o"
#define PRIuLEAST64 "I64u"
#define PRIxLEAST64 "I64x"
#define PRIXLEAST64 "I64X"
#define PRIoFAST64 "I64o"
#define PRIuFAST64 "I64u"
#define PRIxFAST64 "I64x"
#define PRIXFAST64 "I64X"
#define PRIoMAX "I64o"
#define PRIuMAX "I64u"
#define PRIxMAX "I64x"
#define PRIXMAX "I64X"
#define PRIoPTR "Io"
#define PRIuPTR "Iu"
#define PRIxPTR "Ix"
#define PRIXPTR "IX"
// The fscanf macros for signed integers are:
#define SCNd8 "d"
#define SCNi8 "i"
#define SCNdLEAST8 "d"
#define SCNiLEAST8 "i"
#define SCNdFAST8 "d"
#define SCNiFAST8 "i"
#define SCNd16 "hd"
#define SCNi16 "hi"
#define SCNdLEAST16 "hd"
#define SCNiLEAST16 "hi"
#define SCNdFAST16 "hd"
#define SCNiFAST16 "hi"
#define SCNd32 "ld"
#define SCNi32 "li"
#define SCNdLEAST32 "ld"
#define SCNiLEAST32 "li"
#define SCNdFAST32 "ld"
#define SCNiFAST32 "li"
#define SCNd64 "I64d"
#define SCNi64 "I64i"
#define SCNdLEAST64 "I64d"
#define SCNiLEAST64 "I64i"
#define SCNdFAST64 "I64d"
#define SCNiFAST64 "I64i"
#define SCNdMAX "I64d"
#define SCNiMAX "I64i"
#ifdef _WIN64 // [
# define SCNdPTR "I64d"
# define SCNiPTR "I64i"
#else // _WIN64 ][
# define SCNdPTR "ld"
# define SCNiPTR "li"
#endif // _WIN64 ]
// The fscanf macros for unsigned integers are:
#define SCNo8 "o"
#define SCNu8 "u"
#define SCNx8 "x"
#define SCNX8 "X"
#define SCNoLEAST8 "o"
#define SCNuLEAST8 "u"
#define SCNxLEAST8 "x"
#define SCNXLEAST8 "X"
#define SCNoFAST8 "o"
#define SCNuFAST8 "u"
#define SCNxFAST8 "x"
#define SCNXFAST8 "X"
#define SCNo16 "ho"
#define SCNu16 "hu"
#define SCNx16 "hx"
#define SCNX16 "hX"
#define SCNoLEAST16 "ho"
#define SCNuLEAST16 "hu"
#define SCNxLEAST16 "hx"
#define SCNXLEAST16 "hX"
#define SCNoFAST16 "ho"
#define SCNuFAST16 "hu"
#define SCNxFAST16 "hx"
#define SCNXFAST16 "hX"
#define SCNo32 "lo"
#define SCNu32 "lu"
#define SCNx32 "lx"
#define SCNX32 "lX"
#define SCNoLEAST32 "lo"
#define SCNuLEAST32 "lu"
#define SCNxLEAST32 "lx"
#define SCNXLEAST32 "lX"
#define SCNoFAST32 "lo"
#define SCNuFAST32 "lu"
#define SCNxFAST32 "lx"
#define SCNXFAST32 "lX"
#define SCNo64 "I64o"
#define SCNu64 "I64u"
#define SCNx64 "I64x"
#define SCNX64 "I64X"
#define SCNoLEAST64 "I64o"
#define SCNuLEAST64 "I64u"
#define SCNxLEAST64 "I64x"
#define SCNXLEAST64 "I64X"
#define SCNoFAST64 "I64o"
#define SCNuFAST64 "I64u"
#define SCNxFAST64 "I64x"
#define SCNXFAST64 "I64X"
#define SCNoMAX "I64o"
#define SCNuMAX "I64u"
#define SCNxMAX "I64x"
#define SCNXMAX "I64X"
#ifdef _WIN64 // [
# define SCNoPTR "I64o"
# define SCNuPTR "I64u"
# define SCNxPTR "I64x"
# define SCNXPTR "I64X"
#else // _WIN64 ][
# define SCNoPTR "lo"
# define SCNuPTR "lu"
# define SCNxPTR "lx"
# define SCNXPTR "lX"
#endif // _WIN64 ]
// 7.8.2 Functions for greatest-width integer types
// 7.8.2.1 The imaxabs function
#define imaxabs _abs64
// 7.8.2.2 The imaxdiv function
// This is modified version of div() function from Microsoft's div.c found
// in %MSVC.NET%\crt\src\div.c
#ifdef STATIC_IMAXDIV // [
static
#else // STATIC_IMAXDIV ][
_inline
#endif // STATIC_IMAXDIV ]
imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
{
imaxdiv_t result;
result.quot = numer / denom;
result.rem = numer % denom;
if (numer < 0 && result.rem > 0) {
// did division wrong; must fix up
++result.quot;
result.rem -= denom;
}
return result;
}
// 7.8.2.3 The strtoimax and strtoumax functions
#define strtoimax _strtoi64
#define strtoumax _strtoui64
// 7.8.2.4 The wcstoimax and wcstoumax functions
#define wcstoimax _wcstoi64
#define wcstoumax _wcstoui64
#endif // _MSC_INTTYPES_H_ ]

@ -0,0 +1,22 @@
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MS_STAT_H_
#define _MS_STAT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <sys/stat.h>
/* Test macros for file types. */
#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
#define S_ISDIR(mode) __S_ISTYPE((mode), S_IFDIR)
#define S_ISCHR(mode) __S_ISTYPE((mode), S_IFCHR)
#define S_ISBLK(mode) __S_ISTYPE((mode), S_IFBLK)
#define S_ISREG(mode) __S_ISTYPE((mode), S_IFREG)
#endif

@ -0,0 +1,222 @@
// ISO C9x compliant stdint.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the author may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_STDINT_H_ // [
#define _MSC_STDINT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <limits.h>
// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
// or compiler give many errors like this:
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
#if (_MSC_VER < 1300) && defined(__cplusplus)
extern "C++" {
#endif
# include <wchar.h>
#if (_MSC_VER < 1300) && defined(__cplusplus)
}
#endif
// 7.18.1 Integer types
// 7.18.1.1 Exact-width integer types
typedef __int8 int8_t;
typedef __int16 int16_t;
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
// 7.18.1.2 Minimum-width integer types
typedef int8_t int_least8_t;
typedef int16_t int_least16_t;
typedef int32_t int_least32_t;
typedef int64_t int_least64_t;
typedef uint8_t uint_least8_t;
typedef uint16_t uint_least16_t;
typedef uint32_t uint_least32_t;
typedef uint64_t uint_least64_t;
// 7.18.1.3 Fastest minimum-width integer types
typedef int8_t int_fast8_t;
typedef int16_t int_fast16_t;
typedef int32_t int_fast32_t;
typedef int64_t int_fast64_t;
typedef uint8_t uint_fast8_t;
typedef uint16_t uint_fast16_t;
typedef uint32_t uint_fast32_t;
typedef uint64_t uint_fast64_t;
// 7.18.1.4 Integer types capable of holding object pointers
#ifdef _WIN64 // [
typedef __int64 intptr_t;
typedef unsigned __int64 uintptr_t;
#else // _WIN64 ][
typedef int intptr_t;
typedef unsigned int uintptr_t;
#endif // _WIN64 ]
// 7.18.1.5 Greatest-width integer types
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
// 7.18.2 Limits of specified-width integer types
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
// 7.18.2.1 Limits of exact-width integer types
#define INT8_MIN ((int8_t)_I8_MIN)
#define INT8_MAX _I8_MAX
#define INT16_MIN ((int16_t)_I16_MIN)
#define INT16_MAX _I16_MAX
#define INT32_MIN ((int32_t)_I32_MIN)
#define INT32_MAX _I32_MAX
#define INT64_MIN ((int64_t)_I64_MIN)
#define INT64_MAX _I64_MAX
#define UINT8_MAX _UI8_MAX
#define UINT16_MAX _UI16_MAX
#define UINT32_MAX _UI32_MAX
#define UINT64_MAX _UI64_MAX
// 7.18.2.2 Limits of minimum-width integer types
#define INT_LEAST8_MIN INT8_MIN
#define INT_LEAST8_MAX INT8_MAX
#define INT_LEAST16_MIN INT16_MIN
#define INT_LEAST16_MAX INT16_MAX
#define INT_LEAST32_MIN INT32_MIN
#define INT_LEAST32_MAX INT32_MAX
#define INT_LEAST64_MIN INT64_MIN
#define INT_LEAST64_MAX INT64_MAX
#define UINT_LEAST8_MAX UINT8_MAX
#define UINT_LEAST16_MAX UINT16_MAX
#define UINT_LEAST32_MAX UINT32_MAX
#define UINT_LEAST64_MAX UINT64_MAX
// 7.18.2.3 Limits of fastest minimum-width integer types
#define INT_FAST8_MIN INT8_MIN
#define INT_FAST8_MAX INT8_MAX
#define INT_FAST16_MIN INT16_MIN
#define INT_FAST16_MAX INT16_MAX
#define INT_FAST32_MIN INT32_MIN
#define INT_FAST32_MAX INT32_MAX
#define INT_FAST64_MIN INT64_MIN
#define INT_FAST64_MAX INT64_MAX
#define UINT_FAST8_MAX UINT8_MAX
#define UINT_FAST16_MAX UINT16_MAX
#define UINT_FAST32_MAX UINT32_MAX
#define UINT_FAST64_MAX UINT64_MAX
// 7.18.2.4 Limits of integer types capable of holding object pointers
#ifdef _WIN64 // [
# define INTPTR_MIN INT64_MIN
# define INTPTR_MAX INT64_MAX
# define UINTPTR_MAX UINT64_MAX
#else // _WIN64 ][
# define INTPTR_MIN INT32_MIN
# define INTPTR_MAX INT32_MAX
# define UINTPTR_MAX UINT32_MAX
#endif // _WIN64 ]
// 7.18.2.5 Limits of greatest-width integer types
#define INTMAX_MIN INT64_MIN
#define INTMAX_MAX INT64_MAX
#define UINTMAX_MAX UINT64_MAX
// 7.18.3 Limits of other integer types
#ifdef _WIN64 // [
# define PTRDIFF_MIN _I64_MIN
# define PTRDIFF_MAX _I64_MAX
#else // _WIN64 ][
# define PTRDIFF_MIN _I32_MIN
# define PTRDIFF_MAX _I32_MAX
#endif // _WIN64 ]
#define SIG_ATOMIC_MIN INT_MIN
#define SIG_ATOMIC_MAX INT_MAX
#ifndef SIZE_MAX // [
# ifdef _WIN64 // [
# define SIZE_MAX _UI64_MAX
# else // _WIN64 ][
# define SIZE_MAX _UI32_MAX
# endif // _WIN64 ]
#endif // SIZE_MAX ]
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
#ifndef WCHAR_MIN // [
# define WCHAR_MIN 0
#endif // WCHAR_MIN ]
#ifndef WCHAR_MAX // [
# define WCHAR_MAX _UI16_MAX
#endif // WCHAR_MAX ]
#define WINT_MIN 0
#define WINT_MAX _UI16_MAX
#endif // __STDC_LIMIT_MACROS ]
// 7.18.4 Limits of other integer types
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
// 7.18.4.1 Macros for minimum-width integer constants
#define INT8_C(val) val##i8
#define INT16_C(val) val##i16
#define INT32_C(val) val##i32
#define INT64_C(val) val##i64
#define UINT8_C(val) val##ui8
#define UINT16_C(val) val##ui16
#define UINT32_C(val) val##ui32
#define UINT64_C(val) val##ui64
// 7.18.4.2 Macros for greatest-width integer constants
#define INTMAX_C INT64_C
#define UINTMAX_C UINT64_C
#endif // __STDC_CONSTANT_MACROS ]
#endif // _MSC_STDINT_H_ ]

@ -0,0 +1,426 @@
/*!
\file gk_proto.h
\brief This file contains function prototypes
\date Started 3/27/2007
\author George
\version\verbatim $Id: gk_proto.h 22010 2018-05-14 20:20:26Z karypis $ \endverbatim
*/
#ifndef _GK_PROTO_H_
#define _GK_PROTO_H_
#ifdef __cplusplus
extern "C" {
#endif
/*-------------------------------------------------------------
* blas.c
*-------------------------------------------------------------*/
GK_MKBLAS_PROTO(gk_c, char, int)
GK_MKBLAS_PROTO(gk_i, int, int)
GK_MKBLAS_PROTO(gk_i8, int8_t, int8_t)
GK_MKBLAS_PROTO(gk_i16, int16_t, int16_t)
GK_MKBLAS_PROTO(gk_i32, int32_t, int32_t)
GK_MKBLAS_PROTO(gk_i64, int64_t, int64_t)
GK_MKBLAS_PROTO(gk_z, ssize_t, ssize_t)
GK_MKBLAS_PROTO(gk_zu, size_t, size_t)
GK_MKBLAS_PROTO(gk_f, float, float)
GK_MKBLAS_PROTO(gk_d, double, double)
GK_MKBLAS_PROTO(gk_idx, gk_idx_t, gk_idx_t)
/*-------------------------------------------------------------
* io.c
*-------------------------------------------------------------*/
FILE *gk_fopen(char *, char *, const char *);
void gk_fclose(FILE *);
ssize_t gk_read(int fd, void *vbuf, size_t count);
ssize_t gk_write(int fd, void *vbuf, size_t count);
ssize_t gk_getline(char **lineptr, size_t *n, FILE *stream);
char **gk_readfile(char *fname, size_t *r_nlines);
int32_t *gk_i32readfile(char *fname, size_t *r_nlines);
int64_t *gk_i64readfile(char *fname, size_t *r_nlines);
ssize_t *gk_zreadfile(char *fname, size_t *r_nlines);
char *gk_creadfilebin(char *fname, size_t *r_nelmnts);
size_t gk_cwritefilebin(char *fname, size_t n, char *a);
int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts);
size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a);
int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts);
size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a);
ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts);
size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a);
float *gk_freadfilebin(char *fname, size_t *r_nelmnts);
size_t gk_fwritefilebin(char *fname, size_t n, float *a);
double *gk_dreadfilebin(char *fname, size_t *r_nelmnts);
size_t gk_dwritefilebin(char *fname, size_t n, double *a);
/*-------------------------------------------------------------
* fs.c
*-------------------------------------------------------------*/
int gk_fexists(char *);
int gk_dexists(char *);
ssize_t gk_getfsize(char *);
void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens,
size_t *r_max_nlntokens, size_t *r_nbytes);
char *gk_getbasename(char *path);
char *gk_getextname(char *path);
char *gk_getfilename(char *path);
char *gk_getpathname(char *path);
int gk_mkpath(char *);
int gk_rmpath(char *);
/*-------------------------------------------------------------
* memory.c
*-------------------------------------------------------------*/
GK_MKALLOC_PROTO(gk_c, char)
GK_MKALLOC_PROTO(gk_i, int)
GK_MKALLOC_PROTO(gk_i8, int8_t)
GK_MKALLOC_PROTO(gk_i16, int16_t)
GK_MKALLOC_PROTO(gk_i32, int32_t)
GK_MKALLOC_PROTO(gk_i64, int64_t)
GK_MKALLOC_PROTO(gk_ui8, uint8_t)
GK_MKALLOC_PROTO(gk_ui16, uint16_t)
GK_MKALLOC_PROTO(gk_ui32, uint32_t)
GK_MKALLOC_PROTO(gk_ui64, uint64_t)
GK_MKALLOC_PROTO(gk_z, ssize_t)
GK_MKALLOC_PROTO(gk_zu, size_t)
GK_MKALLOC_PROTO(gk_f, float)
GK_MKALLOC_PROTO(gk_d, double)
GK_MKALLOC_PROTO(gk_idx, gk_idx_t)
GK_MKALLOC_PROTO(gk_ckv, gk_ckv_t)
GK_MKALLOC_PROTO(gk_ikv, gk_ikv_t)
GK_MKALLOC_PROTO(gk_i8kv, gk_i8kv_t)
GK_MKALLOC_PROTO(gk_i16kv, gk_i16kv_t)
GK_MKALLOC_PROTO(gk_i32kv, gk_i32kv_t)
GK_MKALLOC_PROTO(gk_i64kv, gk_i64kv_t)
GK_MKALLOC_PROTO(gk_zkv, gk_zkv_t)
GK_MKALLOC_PROTO(gk_zukv, gk_zukv_t)
GK_MKALLOC_PROTO(gk_fkv, gk_fkv_t)
GK_MKALLOC_PROTO(gk_dkv, gk_dkv_t)
GK_MKALLOC_PROTO(gk_skv, gk_skv_t)
GK_MKALLOC_PROTO(gk_idxkv, gk_idxkv_t)
void gk_AllocMatrix(void ***, size_t, size_t , size_t);
void gk_FreeMatrix(void ***, size_t, size_t);
int gk_malloc_init();
void gk_malloc_cleanup(int showstats);
void *gk_malloc(size_t nbytes, char *msg);
void *gk_realloc(void *oldptr, size_t nbytes, char *msg);
void gk_free(void **ptr1,...);
size_t gk_GetCurMemoryUsed();
size_t gk_GetMaxMemoryUsed();
void gk_GetVMInfo(size_t *vmsize, size_t *vmrss);
size_t gk_GetProcVmPeak();
/*-------------------------------------------------------------
* seq.c
*-------------------------------------------------------------*/
gk_seq_t *gk_seq_ReadGKMODPSSM(char *file_name);
gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet);
void gk_seq_init(gk_seq_t *seq);
/*-------------------------------------------------------------
* error.c
*-------------------------------------------------------------*/
void gk_set_exit_on_error(int value);
void errexit(char *,...);
void gk_errexit(int signum, char *,...);
int gk_sigtrap();
int gk_siguntrap();
void gk_sigthrow(int signum);
void gk_SetSignalHandlers();
void gk_UnsetSignalHandlers();
void gk_NonLocalExit_Handler(int signum);
char *gk_strerror(int errnum);
void PrintBackTrace();
/*-------------------------------------------------------------
* util.c
*-------------------------------------------------------------*/
void gk_RandomPermute(size_t, int *, int);
void gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind);
int gk_log2(int);
int gk_ispow2(int);
float gk_flog2(float);
/*-------------------------------------------------------------
* time.c
*-------------------------------------------------------------*/
gk_wclock_t gk_WClockSeconds(void);
double gk_CPUSeconds(void);
/*-------------------------------------------------------------
* string.c
*-------------------------------------------------------------*/
char *gk_strchr_replace(char *str, char *fromlist, char *tolist);
int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options, char **new_str);
char *gk_strtprune(char *, char *);
char *gk_strhprune(char *, char *);
char *gk_strtoupper(char *);
char *gk_strtolower(char *);
char *gk_strdup(char *orgstr);
int gk_strcasecmp(char *s1, char *s2);
int gk_strrcmp(char *s1, char *s2);
char *gk_time2str(time_t time);
time_t gk_str2time(char *str);
int gk_GetStringID(gk_StringMap_t *strmap, char *key);
/*-------------------------------------------------------------
* sort.c
*-------------------------------------------------------------*/
void gk_csorti(size_t, char *);
void gk_csortd(size_t, char *);
void gk_isorti(size_t, int *);
void gk_isortd(size_t, int *);
void gk_i32sorti(size_t, int32_t *);
void gk_i32sortd(size_t, int32_t *);
void gk_i64sorti(size_t, int64_t *);
void gk_i64sortd(size_t, int64_t *);
void gk_ui32sorti(size_t, uint32_t *);
void gk_ui32sortd(size_t, uint32_t *);
void gk_ui64sorti(size_t, uint64_t *);
void gk_ui64sortd(size_t, uint64_t *);
void gk_fsorti(size_t, float *);
void gk_fsortd(size_t, float *);
void gk_dsorti(size_t, double *);
void gk_dsortd(size_t, double *);
void gk_idxsorti(size_t, gk_idx_t *);
void gk_idxsortd(size_t, gk_idx_t *);
void gk_ckvsorti(size_t, gk_ckv_t *);
void gk_ckvsortd(size_t, gk_ckv_t *);
void gk_ikvsorti(size_t, gk_ikv_t *);
void gk_ikvsortd(size_t, gk_ikv_t *);
void gk_i32kvsorti(size_t, gk_i32kv_t *);
void gk_i32kvsortd(size_t, gk_i32kv_t *);
void gk_i64kvsorti(size_t, gk_i64kv_t *);
void gk_i64kvsortd(size_t, gk_i64kv_t *);
void gk_zkvsorti(size_t, gk_zkv_t *);
void gk_zkvsortd(size_t, gk_zkv_t *);
void gk_zukvsorti(size_t, gk_zukv_t *);
void gk_zukvsortd(size_t, gk_zukv_t *);
void gk_fkvsorti(size_t, gk_fkv_t *);
void gk_fkvsortd(size_t, gk_fkv_t *);
void gk_dkvsorti(size_t, gk_dkv_t *);
void gk_dkvsortd(size_t, gk_dkv_t *);
void gk_skvsorti(size_t, gk_skv_t *);
void gk_skvsortd(size_t, gk_skv_t *);
void gk_idxkvsorti(size_t, gk_idxkv_t *);
void gk_idxkvsortd(size_t, gk_idxkv_t *);
/*-------------------------------------------------------------
* Selection routines
*-------------------------------------------------------------*/
int gk_dfkvkselect(size_t, int, gk_fkv_t *);
int gk_ifkvkselect(size_t, int, gk_fkv_t *);
/*-------------------------------------------------------------
* Priority queue
*-------------------------------------------------------------*/
GK_MKPQUEUE_PROTO(gk_ipq, gk_ipq_t, int, gk_idx_t)
GK_MKPQUEUE_PROTO(gk_i32pq, gk_i32pq_t, int32_t, gk_idx_t)
GK_MKPQUEUE_PROTO(gk_i64pq, gk_i64pq_t, int64_t, gk_idx_t)
GK_MKPQUEUE_PROTO(gk_fpq, gk_fpq_t, float, gk_idx_t)
GK_MKPQUEUE_PROTO(gk_dpq, gk_dpq_t, double, gk_idx_t)
GK_MKPQUEUE_PROTO(gk_idxpq, gk_idxpq_t, gk_idx_t, gk_idx_t)
/*-------------------------------------------------------------
* HTable routines
*-------------------------------------------------------------*/
gk_HTable_t *HTable_Create(int nelements);
void HTable_Reset(gk_HTable_t *htable);
void HTable_Resize(gk_HTable_t *htable, int nelements);
void HTable_Insert(gk_HTable_t *htable, int key, int val);
void HTable_Delete(gk_HTable_t *htable, int key);
int HTable_Search(gk_HTable_t *htable, int key);
int HTable_GetNext(gk_HTable_t *htable, int key, int *val, int type);
int HTable_SearchAndDelete(gk_HTable_t *htable, int key);
void HTable_Destroy(gk_HTable_t *htable);
int HTable_HFunction(int nelements, int key);
/*-------------------------------------------------------------
* Tokenizer routines
*-------------------------------------------------------------*/
void gk_strtokenize(char *line, char *delim, gk_Tokens_t *tokens);
void gk_freetokenslist(gk_Tokens_t *tokens);
/*-------------------------------------------------------------
* Encoder/Decoder
*-------------------------------------------------------------*/
void encodeblock(unsigned char *in, unsigned char *out);
void decodeblock(unsigned char *in, unsigned char *out);
void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer);
void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer);
/*-------------------------------------------------------------
* random.c
*-------------------------------------------------------------*/
GK_MKRANDOM_PROTO(gk_c, size_t, char)
GK_MKRANDOM_PROTO(gk_i, size_t, int)
GK_MKRANDOM_PROTO(gk_i32, size_t, int32_t)
GK_MKRANDOM_PROTO(gk_f, size_t, float)
GK_MKRANDOM_PROTO(gk_d, size_t, double)
GK_MKRANDOM_PROTO(gk_idx, size_t, gk_idx_t)
GK_MKRANDOM_PROTO(gk_z, size_t, ssize_t)
GK_MKRANDOM_PROTO(gk_zu, size_t, size_t)
void gk_randinit(uint64_t);
uint64_t gk_randint64(void);
uint32_t gk_randint32(void);
/*-------------------------------------------------------------
* OpenMP fake functions
*-------------------------------------------------------------*/
#if !defined(__OPENMP__)
void omp_set_num_threads(int num_threads);
int omp_get_num_threads(void);
int omp_get_max_threads(void);
int omp_get_thread_num(void);
int omp_get_num_procs(void);
int omp_in_parallel(void);
void omp_set_dynamic(int num_threads);
int omp_get_dynamic(void);
void omp_set_nested(int nested);
int omp_get_nested(void);
#endif /* __OPENMP__ */
/*-------------------------------------------------------------
* CSR-related functions
*-------------------------------------------------------------*/
gk_csr_t *gk_csr_Create();
void gk_csr_Init(gk_csr_t *mat);
void gk_csr_Free(gk_csr_t **mat);
void gk_csr_FreeContents(gk_csr_t *mat);
gk_csr_t *gk_csr_Dup(gk_csr_t *mat);
gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows);
gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind);
gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid);
gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color);
int gk_csr_DetermineFormat(char *filename, int format);
gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering);
void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering);
gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf);
gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction);
gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval);
gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore);
void gk_csr_CompactColumns(gk_csr_t *mat);
void gk_csr_SortIndices(gk_csr_t *mat, int what);
void gk_csr_CreateIndex(gk_csr_t *mat, int what);
void gk_csr_Normalize(gk_csr_t *mat, int what, int norm);
void gk_csr_Scale(gk_csr_t *mat, int type);
void gk_csr_ComputeSums(gk_csr_t *mat, int what);
void gk_csr_ComputeNorms(gk_csr_t *mat, int what);
void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what);
gk_csr_t *gk_csr_Shuffle(gk_csr_t *mat, int what, int summetric);
gk_csr_t *gk_csr_Transpose(gk_csr_t *mat);
float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, int simtype);
float gk_csr_ComputePairSimilarity(gk_csr_t *mat_a, gk_csr_t *mat_b, int i1, int i2, int what, int simtype);
int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, float *qval,
int simtype, int nsim, float minsim, gk_fkv_t *hits, int *_imarker,
gk_fkv_t *i_cand);
int gk_csr_FindConnectedComponents(gk_csr_t *mat, int32_t *cptr, int32_t *cind,
int32_t *cids);
gk_csr_t *gk_csr_MakeSymmetric(gk_csr_t *mat, int op);
gk_csr_t *gk_csr_ReorderSymmetric(gk_csr_t *mat, int32_t *perm, int32_t *iperm);
void gk_csr_ComputeBFSOrderingSymmetric(gk_csr_t *mat, int maxdegree, int v,
int32_t **r_perm, int32_t **r_iperm);
void gk_csr_ComputeBestFOrderingSymmetric(gk_csr_t *mat, int v, int type,
int32_t **r_perm, int32_t **r_iperm);
/* itemsets.c */
void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind,
int minfreq, int maxfreq, int minlen, int maxlen,
void (*process_itemset)(void *stateptr, int nitems, int *itemind,
int ntrans, int *tranind),
void *stateptr);
/* evaluate.c */
float ComputeAccuracy(int n, gk_fkv_t *list);
float ComputeROCn(int n, int maxN, gk_fkv_t *list);
float ComputeMedianRFP(int n, gk_fkv_t *list);
float ComputeMean (int n, float *values);
float ComputeStdDev(int n, float *values);
/* mcore.c */
gk_mcore_t *gk_mcoreCreate(size_t coresize);
gk_mcore_t *gk_gkmcoreCreate();
void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats);
void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats);
void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes);
void gk_mcorePush(gk_mcore_t *mcore);
void gk_gkmcorePush(gk_mcore_t *mcore);
void gk_mcorePop(gk_mcore_t *mcore);
void gk_gkmcorePop(gk_mcore_t *mcore);
void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr);
void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr);
void gk_mcoreDel(gk_mcore_t *mcore, void *ptr);
void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr);
/* rw.c */
int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr);
/* graph.c */
gk_graph_t *gk_graph_Create();
void gk_graph_Init(gk_graph_t *graph);
void gk_graph_Free(gk_graph_t **graph);
void gk_graph_FreeContents(gk_graph_t *graph);
gk_graph_t *gk_graph_Read(char *filename, int format, int hasvals,
int numbering, int isfewgts, int isfvwgts, int isfvsizes);
void gk_graph_Write(gk_graph_t *graph, char *filename, int format, int numbering);
gk_graph_t *gk_graph_Dup(gk_graph_t *graph);
gk_graph_t *gk_graph_Transpose(gk_graph_t *graph);
gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs);
gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm);
int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind);
void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm,
int32_t **r_iperm);
void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type,
int32_t **r_perm, int32_t **r_iperm);
void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type,
int32_t **r_perm, int32_t **r_iperm);
void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps);
void gk_graph_SortAdjacencies(gk_graph_t *graph);
gk_graph_t *gk_graph_MakeSymmetric(gk_graph_t *graph, int op);
/* cache.c */
gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits);
void gk_cacheReset(gk_cache_t *cache);
void gk_cacheDestroy(gk_cache_t **r_cache);
int gk_cacheLoad(gk_cache_t *cache, size_t addr);
double gk_cacheGetHitRate(gk_cache_t *cache);
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,296 @@
/*!
\file gk_struct.h
\brief This file contains various datastructures used/provided by GKlib
\date Started 3/27/2007
\author George
\version\verbatim $Id: gk_struct.h 21988 2018-04-16 00:11:19Z karypis $ \endverbatim
*/
#ifndef _GK_STRUCT_H_
#define _GK_STRUCT_H_
/********************************************************************/
/*! Generator for gk_??KeyVal_t data structure */
/********************************************************************/
#define GK_MKKEYVALUE_T(NAME, KEYTYPE, VALTYPE) \
typedef struct {\
KEYTYPE key;\
VALTYPE val;\
} NAME;\
/* The actual KeyVal data structures */
GK_MKKEYVALUE_T(gk_ckv_t, char, ssize_t)
GK_MKKEYVALUE_T(gk_ikv_t, int, ssize_t)
GK_MKKEYVALUE_T(gk_i8kv_t, int8_t, ssize_t)
GK_MKKEYVALUE_T(gk_i16kv_t, int16_t, ssize_t)
GK_MKKEYVALUE_T(gk_i32kv_t, int32_t, ssize_t)
GK_MKKEYVALUE_T(gk_i64kv_t, int64_t, ssize_t)
GK_MKKEYVALUE_T(gk_zkv_t, ssize_t, ssize_t)
GK_MKKEYVALUE_T(gk_zukv_t, size_t, ssize_t)
GK_MKKEYVALUE_T(gk_fkv_t, float, ssize_t)
GK_MKKEYVALUE_T(gk_dkv_t, double, ssize_t)
GK_MKKEYVALUE_T(gk_skv_t, char *, ssize_t)
GK_MKKEYVALUE_T(gk_idxkv_t, gk_idx_t, gk_idx_t)
/********************************************************************/
/*! Generator for gk_?pq_t data structure */
/********************************************************************/
#define GK_MKPQUEUE_T(NAME, KVTYPE)\
typedef struct {\
size_t nnodes;\
size_t maxnodes;\
\
/* Heap version of the data structure */ \
KVTYPE *heap;\
ssize_t *locator;\
} NAME;\
GK_MKPQUEUE_T(gk_ipq_t, gk_ikv_t)
GK_MKPQUEUE_T(gk_i32pq_t, gk_i32kv_t)
GK_MKPQUEUE_T(gk_i64pq_t, gk_i64kv_t)
GK_MKPQUEUE_T(gk_fpq_t, gk_fkv_t)
GK_MKPQUEUE_T(gk_dpq_t, gk_dkv_t)
GK_MKPQUEUE_T(gk_idxpq_t, gk_idxkv_t)
#define GK_MKPQUEUE2_T(NAME, KTYPE, VTYPE)\
typedef struct {\
ssize_t nnodes;\
ssize_t maxnodes;\
\
/* Heap version of the data structure */ \
KTYPE *keys;\
VTYPE *vals;\
} NAME;\
/*-------------------------------------------------------------
* The following data structure stores a sparse CSR format
*-------------------------------------------------------------*/
typedef struct gk_csr_t {
int32_t nrows, ncols;
ssize_t *rowptr, *colptr;
int32_t *rowind, *colind;
int32_t *rowids, *colids;
int32_t *rlabels, *clabels;
int32_t *rmap, *cmap;
float *rowval, *colval;
float *rnorms, *cnorms;
float *rsums, *csums;
float *rsizes, *csizes;
float *rvols, *cvols;
float *rwgts, *cwgts;
} gk_csr_t;
/*-------------------------------------------------------------
* The following data structure stores a sparse graph
*-------------------------------------------------------------*/
typedef struct gk_graph_t {
int32_t nvtxs; /*!< The number of vertices in the graph */
ssize_t *xadj; /*!< The ptr-structure of the adjncy list */
int32_t *adjncy; /*!< The adjacency list of the graph */
int32_t *iadjwgt; /*!< The integer edge weights */
float *fadjwgt; /*!< The floating point edge weights */
int32_t *ivwgts; /*!< The integer vertex weights */
float *fvwgts; /*!< The floating point vertex weights */
int32_t *ivsizes; /*!< The integer vertex sizes */
float *fvsizes; /*!< The floating point vertex sizes */
int32_t *vlabels; /*!< The labels of the vertices */
} gk_graph_t;
/*-------------------------------------------------------------
* The following data structure stores stores a string as a
* pair of its allocated buffer and the buffer itself.
*-------------------------------------------------------------*/
typedef struct gk_str_t {
size_t len;
char *buf;
} gk_str_t;
/*-------------------------------------------------------------
* The following data structure implements a string-2-int mapping
* table used for parsing command-line options
*-------------------------------------------------------------*/
typedef struct gk_StringMap_t {
char *name;
int id;
} gk_StringMap_t;
/*------------------------------------------------------------
* This structure implements a simple hash table
*------------------------------------------------------------*/
typedef struct gk_HTable_t {
int nelements; /* The overall size of the hash-table */
int htsize; /* The current size of the hash-table */
gk_ikv_t *harray; /* The actual hash-table */
} gk_HTable_t;
/*------------------------------------------------------------
* This structure implements a gk_Tokens_t list returned by the
* string tokenizer
*------------------------------------------------------------*/
typedef struct gk_Tokens_t {
int ntoks; /* The number of tokens in the input string */
char *strbuf; /* The memory that stores all the entries */
char **list; /* Pointers to the strbuf for each element */
} gk_Tokens_t;
/*------------------------------------------------------------
* This structure implements storage for an atom in a pdb file
*------------------------------------------------------------*/
typedef struct atom {
int serial;
char *name;
char altLoc;
char *resname;
char chainid;
int rserial;
char icode;
char element;
double x;
double y;
double z;
double opcy;
double tmpt;
} atom;
/*------------------------------------------------------------
* This structure implements storage for a center of mass for
* a single residue.
*------------------------------------------------------------*/
typedef struct center_of_mass {
char name;
double x;
double y;
double z;
} center_of_mass;
/*------------------------------------------------------------
* This structure implements storage for a pdb protein
*------------------------------------------------------------*/
typedef struct pdbf {
int natoms; /* Number of atoms */
int nresidues; /* Number of residues based on coordinates */
int ncas;
int nbbs;
int corruption;
char *resSeq; /* Residue sequence based on coordinates */
char **threeresSeq; /* three-letter residue sequence */
atom *atoms;
atom **bbs;
atom **cas;
center_of_mass *cm;
} pdbf;
/*************************************************************
* Localization Structures for converting characters to integers
**************************************************************/
typedef struct gk_i2cc2i_t {
int n;
char *i2c;
int *c2i;
} gk_i2cc2i_t;
/*******************************************************************
*This structure implements storage of a protein sequence
* *****************************************************************/
typedef struct gk_seq_t {
int len; /*Number of Residues */
int *sequence; /* Stores the sequence*/
int **pssm; /* Stores the pssm matrix */
int **psfm; /* Stores the psfm matrix */
char *name; /* Stores the name of the sequence */
int nsymbols;
} gk_seq_t;
/*************************************************************************/
/*! The following data structure stores information about a memory
allocation operation that can either be served from gk_mcore_t or by
a gk_malloc if not sufficient workspace memory is available. */
/*************************************************************************/
typedef struct gk_mop_t {
int type;
ssize_t nbytes;
void *ptr;
} gk_mop_t;
/*************************************************************************/
/*! The following structure defines the mcore for GKlib's customized
memory allocations. */
/*************************************************************************/
typedef struct gk_mcore_t {
/* Workspace information */
size_t coresize; /*!< The amount of core memory that has been allocated */
size_t corecpos; /*!< Index of the first free location in core */
void *core; /*!< Pointer to the core itself */
/* These are for implementing a stack-based allocation scheme using both
core and also dynamically allocated memory */
size_t nmops; /*!< The number of maop_t entries that have been allocated */
size_t cmop; /*!< Index of the first free location in maops */
gk_mop_t *mops; /*!< The array recording the maop_t operations */
/* These are for keeping various statistics for wspacemalloc */
size_t num_callocs; /*!< The number of core mallocs */
size_t num_hallocs; /*!< The number of heap mallocs */
size_t size_callocs; /*!< The total # of bytes in core mallocs */
size_t size_hallocs; /*!< The total # of bytes in heap mallocs */
size_t cur_callocs; /*!< The current # of bytes in core mallocs */
size_t cur_hallocs; /*!< The current # of bytes in heap mallocs */
size_t max_callocs; /*!< The maximum # of bytes in core mallocs at any given time */
size_t max_hallocs; /*!< The maximum # of bytes in heap mallocs at any given time */
} gk_mcore_t;
/*************************************************************************/
/*! The following structure is used for cache simulation for performance
modeling and analysis. */
/*************************************************************************/
typedef struct gk_cache_t {
/*! The total cache is nway*(2^(cnbits+lnbits)) bytes */
uint32_t nway; /*!< the associativity of the cache */
uint32_t lnbits; /*!< the number of address bits indexing the cache line */
uint32_t cnbits; /*!< the number of address bits indexing the cache */
size_t csize; /*!< 2^cnbits */
size_t cmask; /*!< csize-1 */
uint64_t clock; /*!< a clock in terms of accesses */
uint64_t *latimes; /*!< a cacheline-level last access time */
size_t *clines; /*!< the cache in terms of cachelines */
uint64_t nhits; /*!< counts the number of hits */
uint64_t nmisses; /*!< counts the number of misses */
} gk_cache_t;
#endif

@ -0,0 +1,38 @@
/*!
\file gk_types.h
\brief This file contains basic scalar datatype used in GKlib
\date Started 3/27/2007
\author George
\version\verbatim $Id: gk_types.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#ifndef _GK_TYPES_H_
#define _GK_TYPES_H_
/*************************************************************************
* Basic data type definitions. These definitions allow GKlib to separate
* the following elemental types:
* - loop iterator variables, which are set to size_t
* - signed and unsigned int variables that can be set to any # of bits
* - signed and unsigned long variables that can be set to any # of bits
* - real variables, which can be set to single or double precision.
**************************************************************************/
/*typedef ptrdiff_t gk_idx_t; */ /* index variable */
typedef ssize_t gk_idx_t; /* index variable */
typedef int32_t gk_int_t; /* integer values */
typedef uint32_t gk_uint_t; /* unsigned integer values */
typedef int64_t gk_long_t; /* long integer values */
typedef uint64_t gk_ulong_t; /* unsigned long integer values */
typedef float gk_real_t; /* real type */
typedef double gk_dreal_t; /* double precission real type */
typedef double gk_wclock_t; /* wall-clock time */
/*#define GK_IDX_MAX PTRDIFF_MAX*/
#define GK_IDX_MAX ((SIZE_MAX>>1)-2)
#define PRIGKIDX "zd"
#define SCNGKIDX "zd"
#endif

@ -0,0 +1,107 @@
/*!
\file util.c
\brief Various utility routines
\date Started 4/12/2007
\author George
\version\verbatim $Id: gk_util.c 16223 2014-02-15 21:34:09Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************
* This file randomly permutes the contents of an array.
* flag == 0, don't initialize perm
* flag == 1, set p[i] = i
**************************************************************************/
void gk_RandomPermute(size_t n, int *p, int flag)
{
size_t i, u, v;
int tmp;
if (flag == 1) {
for (i=0; i<n; i++)
p[i] = i;
}
for (i=0; i<n/2; i++) {
v = RandomInRange(n);
u = RandomInRange(n);
gk_SWAP(p[v], p[u], tmp);
}
}
/************************************************************************/
/*!
\brief Converts an element-based set membership into a CSR-format set-based
membership.
For example, it takes an array such as part[] that stores where each
element belongs to and returns a pair of arrays (pptr[], pind[]) that
store in CSF format the list of elements belonging in each partition.
\param n
the number of elements in the array (e.g., # of vertices)
\param range
the cardinality of the set (e.g., # of partitions)
\param array
the array that stores the per-element set membership
\param ptr
the array that will store the starting indices in ind for
the elements of each set. This is filled by the routine and
its size should be at least range+1.
\param ind
the array that stores consecutively which elements belong to
each set. The size of this array should be n.
*/
/************************************************************************/
void gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind)
{
size_t i;
gk_iset(range+1, 0, ptr);
for (i=0; i<n; i++)
ptr[array[i]]++;
/* Compute the ptr, ind structure */
MAKECSR(i, range, ptr);
for (i=0; i<n; i++)
ind[ptr[array[i]]++] = i;
SHIFTCSR(i, range, ptr);
}
/*************************************************************************
* This function returns the log2(x)
**************************************************************************/
int gk_log2(int a)
{
size_t i;
for (i=1; a > 1; i++, a = a>>1);
return i-1;
}
/*************************************************************************
* This function checks if the argument is a power of 2
**************************************************************************/
int gk_ispow2(int a)
{
return (a == (1<<gk_log2(a)));
}
/*************************************************************************
* This function returns the log2(x)
**************************************************************************/
float gk_flog2(float a)
{
return log(a)/log(2.0);
}

10704
gkregex.c

File diff suppressed because it is too large Load Diff

@ -0,0 +1,556 @@
/* Definitions for data structures and routines for the regular
expression library.
Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006
Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef _REGEX_H
#define _REGEX_H 1
#include <sys/types.h>
/* Allow the use in C++ code. */
#ifdef __cplusplus
extern "C" {
#endif
/* The following two types have to be signed and unsigned integer type
wide enough to hold a value of a pointer. For most ANSI compilers
ptrdiff_t and size_t should be likely OK. Still size of these two
types is 2 for Microsoft C. Ugh... */
typedef long int s_reg_t;
typedef unsigned long int active_reg_t;
/* The following bits are used to determine the regexp syntax we
recognize. The set/not-set meanings are chosen so that Emacs syntax
remains the value 0. The bits are given in alphabetical order, and
the definitions shifted by one from the previous bit; thus, when we
add or remove a bit, only one other definition need change. */
typedef unsigned long int reg_syntax_t;
/* If this bit is not set, then \ inside a bracket expression is literal.
If set, then such a \ quotes the following character. */
#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
/* If this bit is not set, then + and ? are operators, and \+ and \? are
literals.
If set, then \+ and \? are operators and + and ? are literals. */
#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
/* If this bit is set, then character classes are supported. They are:
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
If not set, then character classes are not supported. */
#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
/* If this bit is set, then ^ and $ are always anchors (outside bracket
expressions, of course).
If this bit is not set, then it depends:
^ is an anchor if it is at the beginning of a regular
expression or after an open-group or an alternation operator;
$ is an anchor if it is at the end of a regular expression, or
before a close-group or an alternation operator.
This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
POSIX draft 11.2 says that * etc. in leading positions is undefined.
We already implemented a previous draft which made those constructs
invalid, though, so we haven't changed the code back. */
#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
/* If this bit is set, then special characters are always special
regardless of where they are in the pattern.
If this bit is not set, then special characters are special only in
some contexts; otherwise they are ordinary. Specifically,
* + ? and intervals are only special when not after the beginning,
open-group, or alternation operator. */
#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
/* If this bit is set, then *, +, ?, and { cannot be first in an re or
immediately after an alternation or begin-group operator. */
#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
/* If this bit is set, then . matches newline.
If not set, then it doesn't. */
#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
/* If this bit is set, then . doesn't match NUL.
If not set, then it does. */
#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
/* If this bit is set, nonmatching lists [^...] do not match newline.
If not set, they do. */
#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
/* If this bit is set, either \{...\} or {...} defines an
interval, depending on RE_NO_BK_BRACES.
If not set, \{, \}, {, and } are literals. */
#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
/* If this bit is set, +, ? and | aren't recognized as operators.
If not set, they are. */
#define RE_LIMITED_OPS (RE_INTERVALS << 1)
/* If this bit is set, newline is an alternation operator.
If not set, newline is literal. */
#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
/* If this bit is set, then `{...}' defines an interval, and \{ and \}
are literals.
If not set, then `\{...\}' defines an interval. */
#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
/* If this bit is set, (...) defines a group, and \( and \) are literals.
If not set, \(...\) defines a group, and ( and ) are literals. */
#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
/* If this bit is set, then \<digit> matches <digit>.
If not set, then \<digit> is a back-reference. */
#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
/* If this bit is set, then | is an alternation operator, and \| is literal.
If not set, then \| is an alternation operator, and | is literal. */
#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
/* If this bit is set, then an ending range point collating higher
than the starting range point, as in [z-a], is invalid.
If not set, then when ending range point collates higher than the
starting range point, the range is ignored. */
#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
/* If this bit is set, then an unmatched ) is ordinary.
If not set, then an unmatched ) is invalid. */
#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
/* If this bit is set, succeed as soon as we match the whole pattern,
without further backtracking. */
#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
/* If this bit is set, do not process the GNU regex operators.
If not set, then the GNU regex operators are recognized. */
#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
/* If this bit is set, turn on internal regex debugging.
If not set, and debugging was on, turn it off.
This only works if regex.c is compiled -DDEBUG.
We define this bit always, so that all that's needed to turn on
debugging is to recompile regex.c; the calling code can always have
this bit set, and it won't affect anything in the normal case. */
#define RE_DEBUG (RE_NO_GNU_OPS << 1)
/* If this bit is set, a syntactically invalid interval is treated as
a string of ordinary characters. For example, the ERE 'a{1' is
treated as 'a\{1'. */
#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
/* If this bit is set, then ignore case when matching.
If not set, then case is significant. */
#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
for ^, because it is difficult to scan the regex backwards to find
whether ^ should be special. */
#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
/* If this bit is set, then \{ cannot be first in an bre or
immediately after an alternation or begin-group operator. */
#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
/* If this bit is set, then no_sub will be set to 1 during
re_compile_pattern. */
#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
already-compiled regexps. */
extern reg_syntax_t re_syntax_options;
/* Define combinations of the above bits for the standard possibilities.
(The [[[ comments delimit what gets put into the Texinfo file, so
don't delete them!) */
/* [[[begin syntaxes]]] */
#define RE_SYNTAX_EMACS 0
#define RE_SYNTAX_AWK \
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
| RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
| RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
#define RE_SYNTAX_GNU_AWK \
((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
& ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
| RE_CONTEXT_INVALID_OPS ))
#define RE_SYNTAX_POSIX_AWK \
(RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
| RE_INTERVALS | RE_NO_GNU_OPS)
#define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
| RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
| RE_NEWLINE_ALT)
#define RE_SYNTAX_EGREP \
(RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
| RE_NEWLINE_ALT | RE_NO_BK_PARENS \
| RE_NO_BK_VBAR)
#define RE_SYNTAX_POSIX_EGREP \
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
| RE_INVALID_INTERVAL_ORD)
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
/* Syntax bits common to both basic and extended POSIX regex syntax. */
#define _RE_SYNTAX_POSIX_COMMON \
(RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
| RE_INTERVALS | RE_NO_EMPTY_RANGES)
#define RE_SYNTAX_POSIX_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
isn't minimal, since other operators, such as \`, aren't disabled. */
#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
#define RE_SYNTAX_POSIX_EXTENDED \
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
| RE_NO_BK_PARENS | RE_NO_BK_VBAR \
| RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
removed and RE_NO_BK_REFS is added. */
#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
/* [[[end syntaxes]]] */
/* Maximum number of duplicates an interval can allow. Some systems
(erroneously) define this in other header files, but we want our
value, so remove any previous define. */
#ifdef RE_DUP_MAX
# undef RE_DUP_MAX
#endif
/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
#define RE_DUP_MAX (0x7fff)
/* POSIX `cflags' bits (i.e., information for `regcomp'). */
/* If this bit is set, then use extended regular expression syntax.
If not set, then use basic regular expression syntax. */
#define REG_EXTENDED 1
/* If this bit is set, then ignore case when matching.
If not set, then case is significant. */
#define REG_ICASE (REG_EXTENDED << 1)
/* If this bit is set, then anchors do not match at newline
characters in the string.
If not set, then anchors do match at newlines. */
#define REG_NEWLINE (REG_ICASE << 1)
/* If this bit is set, then report only success or fail in regexec.
If not set, then returns differ between not matching and errors. */
#define REG_NOSUB (REG_NEWLINE << 1)
/* POSIX `eflags' bits (i.e., information for regexec). */
/* If this bit is set, then the beginning-of-line operator doesn't match
the beginning of the string (presumably because it's not the
beginning of a line).
If not set, then the beginning-of-line operator does match the
beginning of the string. */
#define REG_NOTBOL 1
/* Like REG_NOTBOL, except for the end-of-line. */
#define REG_NOTEOL (1 << 1)
/* Use PMATCH[0] to delimit the start and end of the search in the
buffer. */
#define REG_STARTEND (1 << 2)
/* If any error codes are removed, changed, or added, update the
`re_error_msg' table in regex.c. */
typedef enum
{
#ifdef _XOPEN_SOURCE
REG_ENOSYS = -1, /* This will never happen for this implementation. */
#endif
REG_NOERROR = 0, /* Success. */
REG_NOMATCH, /* Didn't find a match (for regexec). */
/* POSIX regcomp return error codes. (In the order listed in the
standard.) */
REG_BADPAT, /* Invalid pattern. */
REG_ECOLLATE, /* Inalid collating element. */
REG_ECTYPE, /* Invalid character class name. */
REG_EESCAPE, /* Trailing backslash. */
REG_ESUBREG, /* Invalid back reference. */
REG_EBRACK, /* Unmatched left bracket. */
REG_EPAREN, /* Parenthesis imbalance. */
REG_EBRACE, /* Unmatched \{. */
REG_BADBR, /* Invalid contents of \{\}. */
REG_ERANGE, /* Invalid range end. */
REG_ESPACE, /* Ran out of memory. */
REG_BADRPT, /* No preceding re for repetition op. */
/* Error codes we've added. */
REG_EEND, /* Premature end. */
REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
} reg_errcode_t;
/* This data structure represents a compiled pattern. Before calling
the pattern compiler, the fields `buffer', `allocated', `fastmap',
`translate', and `no_sub' can be set. After the pattern has been
compiled, the `re_nsub' field is available. All other fields are
private to the regex routines. */
#ifndef RE_TRANSLATE_TYPE
# define RE_TRANSLATE_TYPE unsigned char *
#endif
struct re_pattern_buffer
{
/* Space that holds the compiled pattern. It is declared as
`unsigned char *' because its elements are sometimes used as
array indexes. */
unsigned char *buffer;
/* Number of bytes to which `buffer' points. */
unsigned long int allocated;
/* Number of bytes actually used in `buffer'. */
unsigned long int used;
/* Syntax setting with which the pattern was compiled. */
reg_syntax_t syntax;
/* Pointer to a fastmap, if any, otherwise zero. re_search uses the
fastmap, if there is one, to skip over impossible starting points
for matches. */
char *fastmap;
/* Either a translate table to apply to all characters before
comparing them, or zero for no translation. The translation is
applied to a pattern when it is compiled and to a string when it
is matched. */
RE_TRANSLATE_TYPE translate;
/* Number of subexpressions found by the compiler. */
size_t re_nsub;
/* Zero if this pattern cannot match the empty string, one else.
Well, in truth it's used only in `re_search_2', to see whether or
not we should use the fastmap, so we don't set this absolutely
perfectly; see `re_compile_fastmap' (the `duplicate' case). */
unsigned can_be_null : 1;
/* If REGS_UNALLOCATED, allocate space in the `regs' structure
for `max (RE_NREGS, re_nsub + 1)' groups.
If REGS_REALLOCATE, reallocate space if necessary.
If REGS_FIXED, use what's there. */
#define REGS_UNALLOCATED 0
#define REGS_REALLOCATE 1
#define REGS_FIXED 2
unsigned regs_allocated : 2;
/* Set to zero when `regex_compile' compiles a pattern; set to one
by `re_compile_fastmap' if it updates the fastmap. */
unsigned fastmap_accurate : 1;
/* If set, `re_match_2' does not return information about
subexpressions. */
unsigned no_sub : 1;
/* If set, a beginning-of-line anchor doesn't match at the beginning
of the string. */
unsigned not_bol : 1;
/* Similarly for an end-of-line anchor. */
unsigned not_eol : 1;
/* If true, an anchor at a newline matches. */
unsigned newline_anchor : 1;
};
typedef struct re_pattern_buffer regex_t;
/* Type for byte offsets within the string. POSIX mandates this. */
typedef int regoff_t;
/* This is the structure we store register match data in. See
regex.texinfo for a full description of what registers match. */
struct re_registers
{
unsigned num_regs;
regoff_t *start;
regoff_t *end;
};
/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
`re_match_2' returns information about at least this many registers
the first time a `regs' structure is passed. */
#ifndef RE_NREGS
# define RE_NREGS 30
#endif
/* POSIX specification for registers. Aside from the different names than
`re_registers', POSIX uses an array of structures, instead of a
structure of arrays. */
typedef struct
{
regoff_t rm_so; /* Byte offset from string's start to substring's start. */
regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
} regmatch_t;
/* Declarations for routines. */
/* Sets the current default syntax to SYNTAX, and return the old syntax.
You can also simply assign to the `re_syntax_options' variable. */
extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
/* Compile the regular expression PATTERN, with length LENGTH
and syntax given by the global `re_syntax_options', into the buffer
BUFFER. Return NULL if successful, and an error string if not. */
extern const char *re_compile_pattern (const char *__pattern, size_t __length,
struct re_pattern_buffer *__buffer);
/* Compile a fastmap for the compiled pattern in BUFFER; used to
accelerate searches. Return 0 if successful and -2 if was an
internal error. */
extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
/* Search in the string STRING (with length LENGTH) for the pattern
compiled into BUFFER. Start searching at position START, for RANGE
characters. Return the starting position of the match, -1 for no
match, or -2 for an internal error. Also return register
information in REGS (if REGS and BUFFER->no_sub are nonzero). */
extern int re_search (struct re_pattern_buffer *__buffer, const char *__string,
int __length, int __start, int __range,
struct re_registers *__regs);
/* Like `re_search', but search in the concatenation of STRING1 and
STRING2. Also, stop searching at index START + STOP. */
extern int re_search_2 (struct re_pattern_buffer *__buffer,
const char *__string1, int __length1,
const char *__string2, int __length2, int __start,
int __range, struct re_registers *__regs, int __stop);
/* Like `re_search', but return how many characters in STRING the regexp
in BUFFER matched, starting at position START. */
extern int re_match (struct re_pattern_buffer *__buffer, const char *__string,
int __length, int __start, struct re_registers *__regs);
/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
extern int re_match_2 (struct re_pattern_buffer *__buffer,
const char *__string1, int __length1,
const char *__string2, int __length2, int __start,
struct re_registers *__regs, int __stop);
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
ENDS. Subsequent matches using BUFFER and REGS will use this memory
for recording register information. STARTS and ENDS must be
allocated with malloc, and must each be at least `NUM_REGS * sizeof
(regoff_t)' bytes long.
If NUM_REGS == 0, then subsequent matches should allocate their own
register data.
Unless this function is called, the first search or match using
PATTERN_BUFFER will allocate its own register data, without
freeing the old data. */
extern void re_set_registers (struct re_pattern_buffer *__buffer,
struct re_registers *__regs,
unsigned int __num_regs,
regoff_t *__starts, regoff_t *__ends);
#if defined _REGEX_RE_COMP || defined _LIBC
# ifndef _CRAY
/* 4.2 bsd compatibility. */
extern char *re_comp (const char *);
extern int re_exec (const char *);
# endif
#endif
/* GCC 2.95 and later have "__restrict"; C99 compilers have
"restrict", and "configure" may have defined "restrict". */
#ifndef __restrict
# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
# if defined restrict || 199901L <= __STDC_VERSION__
# define __restrict restrict
# else
# define __restrict
# endif
# endif
#endif
/* gcc 3.1 and up support the [restrict] syntax. */
#ifndef __restrict_arr
# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
&& !defined __GNUG__
# define __restrict_arr __restrict
# else
# define __restrict_arr
# endif
#endif
/* POSIX compatibility. */
extern int regcomp (regex_t *__restrict __preg,
const char *__restrict __pattern,
int __cflags);
extern int regexec (const regex_t *__restrict __preg,
const char *__restrict __string, size_t __nmatch,
regmatch_t __pmatch[__restrict_arr],
int __eflags);
extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
char *__restrict __errbuf, size_t __errbuf_size);
extern void regfree (regex_t *__preg);
#ifdef __cplusplus
}
#endif /* C++ */
#endif /* regex.h */

1940
graph.c

File diff suppressed because it is too large Load Diff

@ -0,0 +1,247 @@
/*
* Copyright 2004, Regents of the University of Minnesota
*
* This file contains routines for manipulating a direct-access hash table
*
* Started 3/22/04
* George
*
*/
#include <GKlib.h>
/******************************************************************************
* This function creates the hash-table
*******************************************************************************/
gk_HTable_t *HTable_Create(int nelements)
{
gk_HTable_t *htable;
htable = gk_malloc(sizeof(gk_HTable_t), "HTable_Create: htable");
htable->harray = gk_ikvmalloc(nelements, "HTable_Create: harray");
htable->nelements = nelements;
HTable_Reset(htable);
return htable;
}
/******************************************************************************
* This function resets the data-structures associated with the hash-table
*******************************************************************************/
void HTable_Reset(gk_HTable_t *htable)
{
int i;
for (i=0; i<htable->nelements; i++)
htable->harray[i].key = HTABLE_EMPTY;
htable->htsize = 0;
}
/******************************************************************************
* This function resizes the hash-table
*******************************************************************************/
void HTable_Resize(gk_HTable_t *htable, int nelements)
{
int i, old_nelements;
gk_ikv_t *old_harray;
old_nelements = htable->nelements;
old_harray = htable->harray;
/* prepare larger hash */
htable->nelements = nelements;
htable->htsize = 0;
htable->harray = gk_ikvmalloc(nelements, "HTable_Resize: harray");
for (i=0; i<nelements; i++)
htable->harray[i].key = HTABLE_EMPTY;
/* reassign the values */
for (i=0; i<old_nelements; i++)
if (old_harray[i].key != HTABLE_EMPTY)
HTable_Insert(htable, old_harray[i].key, old_harray[i].val);
/* remove old harray */
gk_free((void **)&old_harray, LTERM);
}
/******************************************************************************
* This function inserts a key-value pair in the array
*******************************************************************************/
void HTable_Insert(gk_HTable_t *htable, int key, int val)
{
int i, first;
if (htable->htsize > htable->nelements/2)
HTable_Resize(htable, 2*htable->nelements);
first = HTable_HFunction(htable->nelements, key);
for (i=first; i<htable->nelements; i++) {
if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) {
htable->harray[i].key = key;
htable->harray[i].val = val;
htable->htsize++;
return;
}
}
for (i=0; i<first; i++) {
if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) {
htable->harray[i].key = key;
htable->harray[i].val = val;
htable->htsize++;
return;
}
}
}
/******************************************************************************
* This function deletes key from the htable
*******************************************************************************/
void HTable_Delete(gk_HTable_t *htable, int key)
{
int i, first;
first = HTable_HFunction(htable->nelements, key);
for (i=first; i<htable->nelements; i++) {
if (htable->harray[i].key == key) {
htable->harray[i].key = HTABLE_DELETED;
htable->htsize--;
return;
}
}
for (i=0; i<first; i++) {
if (htable->harray[i].key == key) {
htable->harray[i].key = HTABLE_DELETED;
htable->htsize--;
return;
}
}
}
/******************************************************************************
* This function returns the data associated with the key in the hastable
*******************************************************************************/
int HTable_Search(gk_HTable_t *htable, int key)
{
int i, first;
first = HTable_HFunction(htable->nelements, key);
for (i=first; i<htable->nelements; i++) {
if (htable->harray[i].key == key)
return htable->harray[i].val;
else if (htable->harray[i].key == HTABLE_EMPTY)
return -1;
}
for (i=0; i<first; i++) {
if (htable->harray[i].key == key)
return htable->harray[i].val;
else if (htable->harray[i].key == HTABLE_EMPTY)
return -1;
}
return -1;
}
/******************************************************************************
* This function returns the next key/val
*******************************************************************************/
int HTable_GetNext(gk_HTable_t *htable, int key, int *r_val, int type)
{
int i;
static int first, last;
if (type == HTABLE_FIRST)
first = last = HTable_HFunction(htable->nelements, key);
if (first > last) {
for (i=first; i<htable->nelements; i++) {
if (htable->harray[i].key == key) {
*r_val = htable->harray[i].val;
first = i+1;
return 1;
}
else if (htable->harray[i].key == HTABLE_EMPTY)
return -1;
}
first = 0;
}
for (i=first; i<last; i++) {
if (htable->harray[i].key == key) {
*r_val = htable->harray[i].val;
first = i+1;
return 1;
}
else if (htable->harray[i].key == HTABLE_EMPTY)
return -1;
}
return -1;
}
/******************************************************************************
* This function returns the data associated with the key in the hastable
*******************************************************************************/
int HTable_SearchAndDelete(gk_HTable_t *htable, int key)
{
int i, first;
first = HTable_HFunction(htable->nelements, key);
for (i=first; i<htable->nelements; i++) {
if (htable->harray[i].key == key) {
htable->harray[i].key = HTABLE_DELETED;
htable->htsize--;
return htable->harray[i].val;
}
else if (htable->harray[i].key == HTABLE_EMPTY)
gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n");
}
for (i=0; i<first; i++) {
if (htable->harray[i].key == key) {
htable->harray[i].key = HTABLE_DELETED;
htable->htsize--;
return htable->harray[i].val;
}
else if (htable->harray[i].key == HTABLE_EMPTY)
gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n");
}
return -1;
}
/******************************************************************************
* This function destroys the data structures associated with the hash-table
*******************************************************************************/
void HTable_Destroy(gk_HTable_t *htable)
{
gk_free((void **)&htable->harray, &htable, LTERM);
}
/******************************************************************************
* This is the hash-function. Based on multiplication
*******************************************************************************/
int HTable_HFunction(int nelements, int key)
{
return (int)(key%nelements);
}

681
io.c

@ -0,0 +1,681 @@
/*!
\file io.c
\brief Various file I/O functions.
This file contains various functions that perform I/O.
\date Started 4/10/95
\author George
\version\verbatim $Id: io.c 18951 2015-08-08 20:10:46Z karypis $ \endverbatim
*/
#ifdef HAVE_GETLINE
/* Get getline to be defined. */
#define _GNU_SOURCE
#include <stdio.h>
#undef _GNU_SOURCE
#endif
#include <GKlib.h>
/*************************************************************************
* This function opens a file
**************************************************************************/
FILE *gk_fopen(char *fname, char *mode, const char *msg)
{
FILE *fp;
char errmsg[8192];
fp = fopen(fname, mode);
if (fp != NULL)
return fp;
sprintf(errmsg,"file: %s, mode: %s, [%s]", fname, mode, msg);
perror(errmsg);
errexit("Failed on gk_fopen()\n");
return NULL;
}
/*************************************************************************
* This function closes a file
**************************************************************************/
void gk_fclose(FILE *fp)
{
fclose(fp);
}
/*************************************************************************/
/*! This function is a wrapper around the read() function that ensures
that all data is been read, by issuing multiple read requests.
The only time when not 'count' items are read is when the EOF has been
reached.
*/
/*************************************************************************/
ssize_t gk_read(int fd, void *vbuf, size_t count)
{
char *buf = (char *)vbuf;
ssize_t rsize, tsize=count;
do {
if ((rsize = read(fd, buf, tsize)) == -1)
return -1;
buf += rsize;
tsize -= rsize;
} while (tsize > 0 && rsize > 0);
return count-tsize;
}
/*************************************************************************/
/*! This function is a wrapper around the write() function that ensures
that all data is been written, by issueing multiple write requests.
*/
/*************************************************************************/
ssize_t gk_write(int fd, void *vbuf, size_t count)
{
char *buf = (char *)vbuf;
ssize_t size, tsize=count;
do {
if ((size = write(fd, buf, tsize)) == -1)
return -1;
buf += size;
tsize -= size;
} while (tsize > 0);
return count;
}
/*************************************************************************/
/*! This function is the GKlib implementation of glibc's getline()
function.
\returns -1 if the EOF has been reached, otherwise it returns the
number of bytes read.
*/
/*************************************************************************/
ssize_t gk_getline(char **lineptr, size_t *n, FILE *stream)
{
#ifdef HAVE_GETLINE
return getline(lineptr, n, stream);
#else
size_t i;
int ch;
if (feof(stream))
return -1;
/* Initial memory allocation if *lineptr is NULL */
if (*lineptr == NULL || *n == 0) {
*n = 1024;
*lineptr = gk_malloc((*n)*sizeof(char), "gk_getline: lineptr");
}
/* get into the main loop */
i = 0;
while ((ch = getc(stream)) != EOF) {
(*lineptr)[i++] = (char)ch;
/* reallocate memory if reached at the end of the buffer. The +1 is for '\0' */
if (i+1 == *n) {
*n = 2*(*n);
*lineptr = gk_realloc(*lineptr, (*n)*sizeof(char), "gk_getline: lineptr");
}
if (ch == '\n')
break;
}
(*lineptr)[i] = '\0';
return (i == 0 ? -1 : i);
#endif
}
/*************************************************************************/
/*! This function reads the contents of a text file and returns it in the
form of an array of strings.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
char **gk_readfile(char *fname, size_t *r_nlines)
{
size_t lnlen, nlines=0;
char *line=NULL, **lines=NULL;
FILE *fpin;
gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
if (nlines > 0) {
lines = (char **)gk_malloc(nlines*sizeof(char *), "gk_readfile: lines");
fpin = gk_fopen(fname, "r", "gk_readfile");
nlines = 0;
while (gk_getline(&line, &lnlen, fpin) != -1) {
gk_strtprune(line, "\n\r");
lines[nlines++] = gk_strdup(line);
}
gk_fclose(fpin);
}
gk_free((void **)&line, LTERM);
if (r_nlines != NULL)
*r_nlines = nlines;
return lines;
}
/*************************************************************************/
/*! This function reads the contents of a file and returns it in the
form of an array of int32_t.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
int32_t *gk_i32readfile(char *fname, size_t *r_nlines)
{
size_t lnlen, nlines=0;
char *line=NULL;
int32_t *array=NULL;
FILE *fpin;
gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
if (nlines > 0) {
array = gk_i32malloc(nlines, "gk_i32readfile: array");
fpin = gk_fopen(fname, "r", "gk_readfile");
nlines = 0;
while (gk_getline(&line, &lnlen, fpin) != -1) {
sscanf(line, "%"SCNd32, &array[nlines++]);
}
gk_fclose(fpin);
}
gk_free((void **)&line, LTERM);
if (r_nlines != NULL)
*r_nlines = nlines;
return array;
}
/*************************************************************************/
/*! This function reads the contents of a file and returns it in the
form of an array of int64_t.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
int64_t *gk_i64readfile(char *fname, size_t *r_nlines)
{
size_t lnlen, nlines=0;
char *line=NULL;
int64_t *array=NULL;
FILE *fpin;
gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
if (nlines > 0) {
array = gk_i64malloc(nlines, "gk_i64readfile: array");
fpin = gk_fopen(fname, "r", "gk_readfile");
nlines = 0;
while (gk_getline(&line, &lnlen, fpin) != -1) {
sscanf(line, "%"SCNd64, &array[nlines++]);
}
gk_fclose(fpin);
}
gk_free((void **)&line, LTERM);
if (r_nlines != NULL)
*r_nlines = nlines;
return array;
}
/*************************************************************************/
/*! This function reads the contents of a file and returns it in the
form of an array of ssize_t.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
ssize_t *gk_zreadfile(char *fname, size_t *r_nlines)
{
size_t lnlen, nlines=0;
char *line=NULL;
ssize_t *array=NULL;
FILE *fpin;
gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
if (nlines > 0) {
array = gk_zmalloc(nlines, "gk_zreadfile: array");
fpin = gk_fopen(fname, "r", "gk_readfile");
nlines = 0;
while (gk_getline(&line, &lnlen, fpin) != -1) {
sscanf(line, "%zd", &array[nlines++]);
}
gk_fclose(fpin);
}
gk_free((void **)&line, LTERM);
if (r_nlines != NULL)
*r_nlines = nlines;
return array;
}
/*************************************************************************/
/*! This function reads the contents of a binary file and returns it in the
form of an array of char.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
char *gk_creadfilebin(char *fname, size_t *r_nelmnts)
{
size_t nelmnts;
ssize_t fsize;
char *array=NULL;
FILE *fpin;
*r_nelmnts = 0;
fsize = gk_getfsize(fname);
if (fsize == -1) {
gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
return NULL;
}
nelmnts = fsize;
array = gk_cmalloc(nelmnts, "gk_creadfilebin: array");
fpin = gk_fopen(fname, "rb", "gk_creadfilebin");
if (fread(array, sizeof(char), nelmnts, fpin) != nelmnts) {
gk_errexit(SIGERR, "Failed to read the number of words requested. %zu\n", nelmnts);
gk_free((void **)&array, LTERM);
return NULL;
}
gk_fclose(fpin);
*r_nelmnts = nelmnts;
return array;
}
/*************************************************************************/
/*! This function writes the contents of an array into a binary file.
\param fname is the name of the file
\param n the number of elements in the array.
\param a the array to be written out.
*/
/*************************************************************************/
size_t gk_cwritefilebin(char *fname, size_t n, char *a)
{
size_t fsize;
FILE *fp;
fp = gk_fopen(fname, "wb", "gk_writefilebin");
fsize = fwrite(a, sizeof(char), n, fp);
gk_fclose(fp);
return fsize;
}
/*************************************************************************/
/*! This function reads the contents of a binary file and returns it in the
form of an array of int32_t.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts)
{
size_t nelmnts;
ssize_t fsize;
int32_t *array=NULL;
FILE *fpin;
*r_nelmnts = 0;
fsize = gk_getfsize(fname);
if (fsize == -1) {
gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
return NULL;
}
if (fsize%sizeof(int32_t) != 0) {
gk_errexit(SIGERR, "The size [%zd] of the file [%s] is not in multiples of sizeof(int32_t).\n", fsize, fname);
return NULL;
}
nelmnts = fsize/sizeof(int32_t);
array = gk_i32malloc(nelmnts, "gk_i32readfilebin: array");
fpin = gk_fopen(fname, "rb", "gk_i32readfilebin");
if (fread(array, sizeof(int32_t), nelmnts, fpin) != nelmnts) {
gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
gk_free((void **)&array, LTERM);
return NULL;
}
gk_fclose(fpin);
*r_nelmnts = nelmnts;
return array;
}
/*************************************************************************/
/*! This function writes the contents of an array into a binary file.
\param fname is the name of the file
\param n the number of elements in the array.
\param a the array to be written out.
*/
/*************************************************************************/
size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a)
{
size_t fsize;
FILE *fp;
fp = gk_fopen(fname, "wb", "gk_writefilebin");
fsize = fwrite(a, sizeof(int32_t), n, fp);
gk_fclose(fp);
return fsize;
}
/*************************************************************************/
/*! This function reads the contents of a binary file and returns it in the
form of an array of int64_t.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts)
{
size_t nelmnts;
ssize_t fsize;
int64_t *array=NULL;
FILE *fpin;
*r_nelmnts = 0;
fsize = gk_getfsize(fname);
if (fsize == -1) {
gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
return NULL;
}
if (fsize%sizeof(int64_t) != 0) {
gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(int64_t).\n");
return NULL;
}
nelmnts = fsize/sizeof(int64_t);
array = gk_i64malloc(nelmnts, "gk_i64readfilebin: array");
fpin = gk_fopen(fname, "rb", "gk_i64readfilebin");
if (fread(array, sizeof(int64_t), nelmnts, fpin) != nelmnts) {
gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
gk_free((void **)&array, LTERM);
return NULL;
}
gk_fclose(fpin);
*r_nelmnts = nelmnts;
return array;
}
/*************************************************************************/
/*! This function writes the contents of an array into a binary file.
\param fname is the name of the file
\param n the number of elements in the array.
\param a the array to be written out.
*/
/*************************************************************************/
size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a)
{
size_t fsize;
FILE *fp;
fp = gk_fopen(fname, "wb", "gk_writefilebin");
fsize = fwrite(a, sizeof(int64_t), n, fp);
gk_fclose(fp);
return fsize;
}
/*************************************************************************/
/*! This function reads the contents of a binary file and returns it in the
form of an array of ssize_t.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts)
{
size_t nelmnts;
ssize_t fsize;
ssize_t *array=NULL;
FILE *fpin;
*r_nelmnts = 0;
fsize = gk_getfsize(fname);
if (fsize == -1) {
gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
return NULL;
}
if (fsize%sizeof(ssize_t) != 0) {
gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(ssize_t).\n");
return NULL;
}
nelmnts = fsize/sizeof(ssize_t);
array = gk_zmalloc(nelmnts, "gk_zreadfilebin: array");
fpin = gk_fopen(fname, "rb", "gk_zreadfilebin");
if (fread(array, sizeof(ssize_t), nelmnts, fpin) != nelmnts) {
gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
gk_free((void **)&array, LTERM);
return NULL;
}
gk_fclose(fpin);
*r_nelmnts = nelmnts;
return array;
}
/*************************************************************************/
/*! This function writes the contents of an array into a binary file.
\param fname is the name of the file
\param n the number of elements in the array.
\param a the array to be written out.
*/
/*************************************************************************/
size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a)
{
size_t fsize;
FILE *fp;
fp = gk_fopen(fname, "wb", "gk_writefilebin");
fsize = fwrite(a, sizeof(ssize_t), n, fp);
gk_fclose(fp);
return fsize;
}
/*************************************************************************/
/*! This function reads the contents of a binary file and returns it in the
form of an array of float.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
float *gk_freadfilebin(char *fname, size_t *r_nelmnts)
{
size_t nelmnts;
ssize_t fsize;
float *array=NULL;
FILE *fpin;
*r_nelmnts = 0;
fsize = gk_getfsize(fname);
if (fsize == -1) {
gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
return NULL;
}
if (fsize%sizeof(float) != 0) {
gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(float).\n");
return NULL;
}
nelmnts = fsize/sizeof(float);
array = gk_fmalloc(nelmnts, "gk_freadfilebin: array");
fpin = gk_fopen(fname, "rb", "gk_freadfilebin");
if (fread(array, sizeof(float), nelmnts, fpin) != nelmnts) {
gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
gk_free((void **)&array, LTERM);
return NULL;
}
gk_fclose(fpin);
*r_nelmnts = nelmnts;
return array;
}
/*************************************************************************/
/*! This function writes the contents of an array into a binary file.
\param fname is the name of the file
\param n the number of elements in the array.
\param a the array to be written out.
*/
/*************************************************************************/
size_t gk_fwritefilebin(char *fname, size_t n, float *a)
{
size_t fsize;
FILE *fp;
fp = gk_fopen(fname, "wb", "gk_fwritefilebin");
fsize = fwrite(a, sizeof(float), n, fp);
gk_fclose(fp);
return fsize;
}
/*************************************************************************/
/*! This function reads the contents of a binary file and returns it in the
form of an array of double.
\param fname is the name of the file
\param r_nlines is the number of lines in the file. If it is NULL,
this information is not returned.
*/
/*************************************************************************/
double *gk_dreadfilebin(char *fname, size_t *r_nelmnts)
{
size_t nelmnts;
ssize_t fsize;
double *array=NULL;
FILE *fpin;
*r_nelmnts = 0;
fsize = gk_getfsize(fname);
if (fsize == -1) {
gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
return NULL;
}
if (fsize%sizeof(double) != 0) {
gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(double).\n");
return NULL;
}
nelmnts = fsize/sizeof(double);
array = gk_dmalloc(nelmnts, "gk_dreadfilebin: array");
fpin = gk_fopen(fname, "rb", "gk_dreadfilebin");
if (fread(array, sizeof(double), nelmnts, fpin) != nelmnts) {
gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
gk_free((void **)&array, LTERM);
return NULL;
}
gk_fclose(fpin);
*r_nelmnts = nelmnts;
return array;
}
/*************************************************************************/
/*! This function writes the contents of an array into a binary file.
\param fname is the name of the file
\param n the number of elements in the array.
\param a the array to be written out.
*/
/*************************************************************************/
size_t gk_dwritefilebin(char *fname, size_t n, double *a)
{
size_t fsize;
FILE *fp;
fp = gk_fopen(fname, "wb", "gk_writefilebin");
fsize = fwrite(a, sizeof(double), n, fp);
gk_fclose(fp);
return fsize;
}

@ -0,0 +1,210 @@
/*!
* \file
* \brief Frequent/Closed itemset discovery routines
*
* This file contains the code for finding frequent/closed itemests. These routines
* are implemented using a call-back mechanism to deal with the discovered itemsets.
*
* \date 6/13/2008
* \author George Karypis
* \version\verbatim $Id: itemsets.c 19240 2015-10-22 12:41:19Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*-------------------------------------------------------------*/
/*! Data structures for use within this module */
/*-------------------------------------------------------------*/
typedef struct {
int minfreq; /* the minimum frequency of a pattern */
int maxfreq; /* the maximum frequency of a pattern */
int minlen; /* the minimum length of the requested pattern */
int maxlen; /* the maximum length of the requested pattern */
int tnitems; /* the initial range of the item space */
/* the call-back function */
void (*callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids);
void *stateptr; /* the user-supplied pointer to pass to the callback */
/* workspace variables */
int *rmarker;
gk_ikv_t *cand;
} isparams_t;
/*-------------------------------------------------------------*/
/*! Prototypes for this module */
/*-------------------------------------------------------------*/
void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat,
int preflen, int *prefix);
gk_csr_t *itemsets_project_matrix(isparams_t *param, gk_csr_t *mat, int cid);
/*************************************************************************/
/*! The entry point of the frequent itemset discovery code */
/*************************************************************************/
void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind,
int minfreq, int maxfreq, int minlen, int maxlen,
void (*process_itemset)(void *stateptr, int nitems, int *itemids,
int ntrans, int *transids),
void *stateptr)
{
ssize_t i;
gk_csr_t *mat, *pmat;
isparams_t params;
int *pattern;
/* Create the matrix */
mat = gk_csr_Create();
mat->nrows = ntrans;
mat->ncols = tranind[gk_iargmax(tranptr[ntrans], tranind, 1)]+1;
mat->rowptr = gk_zcopy(ntrans+1, tranptr, gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"));
mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind"));
mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids"));
/* Setup the parameters */
params.minfreq = minfreq;
params.maxfreq = (maxfreq == -1 ? mat->nrows : maxfreq);
params.minlen = minlen;
params.maxlen = (maxlen == -1 ? mat->ncols : maxlen);
params.tnitems = mat->ncols;
params.callback = process_itemset;
params.stateptr = stateptr;
params.rmarker = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker");
params.cand = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand");
/* Perform the initial projection */
gk_csr_CreateIndex(mat, GK_CSR_COL);
pmat = itemsets_project_matrix(&params, mat, -1);
gk_csr_Free(&mat);
pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern");
itemsets_find_frequent_itemsets(&params, pmat, 0, pattern);
gk_csr_Free(&pmat);
gk_free((void **)&pattern, &params.rmarker, &params.cand, LTERM);
}
/*************************************************************************/
/*! The recursive routine for DFS-based frequent pattern discovery */
/*************************************************************************/
void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat,
int preflen, int *prefix)
{
ssize_t i;
gk_csr_t *cmat;
/* Project each frequent column */
for (i=0; i<mat->ncols; i++) {
prefix[preflen] = mat->colids[i];
if (preflen+1 >= params->minlen)
(*params->callback)(params->stateptr, preflen+1, prefix,
mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]);
if (preflen+1 < params->maxlen) {
cmat = itemsets_project_matrix(params, mat, i);
itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix);
gk_csr_Free(&cmat);
}
}
}
/******************************************************************************/
/*! This function projects a matrix w.r.t. to a particular column.
It performs the following steps:
- Determines the length of each column that is remaining.
- Sorts the columns in increasing length.
- Creates a column-based version of the matrix with the proper
column ordering.
*/
/*******************************************************************************/
gk_csr_t *itemsets_project_matrix(isparams_t *params, gk_csr_t *mat, int cid)
{
ssize_t i, j, k, ii, pnnz;
int nrows, ncols, pnrows, pncols;
ssize_t *colptr, *pcolptr;
int *colind, *colids, *pcolind, *pcolids, *rmarker;
gk_csr_t *pmat;
gk_ikv_t *cand;
nrows = mat->nrows;
ncols = mat->ncols;
colptr = mat->colptr;
colind = mat->colind;
colids = mat->colids;
rmarker = params->rmarker;
cand = params->cand;
/* Allocate space for the projected matrix based on what you know thus far */
pmat = gk_csr_Create();
pmat->nrows = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]);
/* Mark the rows that will be kept and determine the prowids */
if (cid == -1) { /* Initial projection */
gk_iset(nrows, 1, rmarker);
}
else { /* The other projections */
for (i=colptr[cid]; i<colptr[cid+1]; i++)
rmarker[colind[i]] = 1;
}
/* Determine the length of each column that will be left in the projected matrix */
for (pncols=0, pnnz=0, i=cid+1; i<ncols; i++) {
for (k=0, j=colptr[i]; j<colptr[i+1]; j++) {
k += rmarker[colind[j]];
}
if (k >= params->minfreq && k <= params->maxfreq) {
cand[pncols].val = i;
cand[pncols++].key = k;
pnnz += k;
}
}
/* Sort the columns in increasing order */
gk_ikvsorti(pncols, cand);
/* Allocate space for the remaining fields of the projected matrix */
pmat->ncols = pncols;
pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids");
pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr");
pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind");
/* Populate the projected matrix */
pcolptr[0] = 0;
for (pnnz=0, ii=0; ii<pncols; ii++) {
i = cand[ii].val;
for (j=colptr[i]; j<colptr[i+1]; j++) {
if (rmarker[colind[j]])
pcolind[pnnz++] = colind[j];
}
pcolids[ii] = colids[i];
pcolptr[ii+1] = pnnz;
}
/* Reset the rmarker array */
if (cid == -1) { /* Initial projection */
gk_iset(nrows, 0, rmarker);
}
else { /* The other projections */
for (i=colptr[cid]; i<colptr[cid+1]; i++)
rmarker[colind[i]] = 0;
}
return pmat;
}

@ -0,0 +1,393 @@
/*!
\file
\brief Functions dealing with creating and allocating mcores
\date Started 5/30/11
\author George
\author Copyright 1997-2011, Regents of the University of Minnesota
\version $Id: mcore.c 13953 2013-03-30 16:20:07Z karypis $
*/
#include <GKlib.h>
/*************************************************************************/
/*! This function creates an mcore
*/
/*************************************************************************/
gk_mcore_t *gk_mcoreCreate(size_t coresize)
{
gk_mcore_t *mcore;
mcore = (gk_mcore_t *)gk_malloc(sizeof(gk_mcore_t), "gk_mcoreCreate: mcore");
memset(mcore, 0, sizeof(gk_mcore_t));
mcore->coresize = coresize;
mcore->corecpos = 0;
mcore->core = (coresize == 0 ? NULL : gk_malloc(mcore->coresize, "gk_mcoreCreate: core"));
/* allocate the memory for keeping track of malloc ops */
mcore->nmops = 2048;
mcore->cmop = 0;
mcore->mops = (gk_mop_t *)gk_malloc(mcore->nmops*sizeof(gk_mop_t), "gk_mcoreCreate: mcore->mops");
return mcore;
}
/*************************************************************************/
/*! This function creates an mcore. This version is used for gkmcore.
*/
/*************************************************************************/
gk_mcore_t *gk_gkmcoreCreate()
{
gk_mcore_t *mcore;
if ((mcore = (gk_mcore_t *)malloc(sizeof(gk_mcore_t))) == NULL)
return NULL;
memset(mcore, 0, sizeof(gk_mcore_t));
/* allocate the memory for keeping track of malloc ops */
mcore->nmops = 2048;
mcore->cmop = 0;
if ((mcore->mops = (gk_mop_t *)malloc(mcore->nmops*sizeof(gk_mop_t))) == NULL) {
free(mcore);
return NULL;
}
return mcore;
}
/*************************************************************************/
/*! This function destroys an mcore.
*/
/*************************************************************************/
void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats)
{
gk_mcore_t *mcore = *r_mcore;
if (mcore == NULL)
return;
if (showstats)
printf("\n gk_mcore statistics\n"
" coresize: %12zu nmops: %12zu cmop: %6zu\n"
" num_callocs: %12zu num_hallocs: %12zu\n"
" size_callocs: %12zu size_hallocs: %12zu\n"
" cur_callocs: %12zu cur_hallocs: %12zu\n"
" max_callocs: %12zu max_hallocs: %12zu\n",
mcore->coresize, mcore->nmops, mcore->cmop,
mcore->num_callocs, mcore->num_hallocs,
mcore->size_callocs, mcore->size_hallocs,
mcore->cur_callocs, mcore->cur_hallocs,
mcore->max_callocs, mcore->max_hallocs);
if (mcore->cur_callocs != 0 || mcore->cur_hallocs != 0 || mcore->cmop != 0) {
printf("***Warning: mcore memory was not fully freed when destroyed.\n"
" cur_callocs: %6zu cur_hallocs: %6zu cmop: %6zu\n",
mcore->cur_callocs, mcore->cur_hallocs, mcore->cmop);
}
gk_free((void **)&mcore->core, &mcore->mops, &mcore, LTERM);
*r_mcore = NULL;
}
/*************************************************************************/
/*! This function destroys an mcore. This version is for gkmcore.
*/
/*************************************************************************/
void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats)
{
gk_mcore_t *mcore = *r_mcore;
if (mcore == NULL)
return;
if (showstats)
printf("\n gk_mcore statistics\n"
" nmops: %12zu cmop: %6zu\n"
" num_hallocs: %12zu\n"
" size_hallocs: %12zu\n"
" cur_hallocs: %12zu\n"
" max_hallocs: %12zu\n",
mcore->nmops, mcore->cmop,
mcore->num_hallocs,
mcore->size_hallocs,
mcore->cur_hallocs,
mcore->max_hallocs);
if (mcore->cur_hallocs != 0 || mcore->cmop != 0) {
printf("***Warning: mcore memory was not fully freed when destroyed.\n"
" cur_hallocs: %6zu cmop: %6zu\n",
mcore->cur_hallocs, mcore->cmop);
}
free(mcore->mops);
free(mcore);
*r_mcore = NULL;
}
/*************************************************************************/
/*! This function allocate space from the core/heap
*/
/*************************************************************************/
void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes)
{
void *ptr;
/* pad to make pointers 8-byte aligned */
nbytes += (nbytes%8 == 0 ? 0 : 8 - nbytes%8);
if (mcore->corecpos + nbytes < mcore->coresize) {
/* service this request from the core */
ptr = ((char *)mcore->core)+mcore->corecpos;
mcore->corecpos += nbytes;
gk_mcoreAdd(mcore, GK_MOPT_CORE, nbytes, ptr);
}
else {
/* service this request from the heap */
ptr = gk_malloc(nbytes, "gk_mcoremalloc: ptr");
gk_mcoreAdd(mcore, GK_MOPT_HEAP, nbytes, ptr);
}
/*
printf("MCMALLOC: %zu %d %8zu\n", mcore->cmop-1,
mcore->mops[mcore->cmop-1].type, mcore->mops[mcore->cmop-1].nbytes);
*/
return ptr;
}
/*************************************************************************/
/*! This function sets a marker in the stack of malloc ops to be used
subsequently for freeing purposes
*/
/*************************************************************************/
void gk_mcorePush(gk_mcore_t *mcore)
{
gk_mcoreAdd(mcore, GK_MOPT_MARK, 0, NULL);
/* printf("MCPPUSH: %zu\n", mcore->cmop-1); */
}
/*************************************************************************/
/*! This function sets a marker in the stack of malloc ops to be used
subsequently for freeing purposes. This is the gkmcore version.
*/
/*************************************************************************/
void gk_gkmcorePush(gk_mcore_t *mcore)
{
gk_gkmcoreAdd(mcore, GK_MOPT_MARK, 0, NULL);
/* printf("MCPPUSH: %zu\n", mcore->cmop-1); */
}
/*************************************************************************/
/*! This function frees all mops since the last push
*/
/*************************************************************************/
void gk_mcorePop(gk_mcore_t *mcore)
{
while (mcore->cmop > 0) {
mcore->cmop--;
switch (mcore->mops[mcore->cmop].type) {
case GK_MOPT_MARK: /* push marker */
goto DONE;
break;
case GK_MOPT_CORE: /* core free */
if (mcore->corecpos < mcore->mops[mcore->cmop].nbytes)
errexit("Internal Error: wspace's core is about to be over-freed [%zu, %zu, %zd]\n",
mcore->coresize, mcore->corecpos, mcore->mops[mcore->cmop].nbytes);
mcore->corecpos -= mcore->mops[mcore->cmop].nbytes;
mcore->cur_callocs -= mcore->mops[mcore->cmop].nbytes;
break;
case GK_MOPT_HEAP: /* heap free */
gk_free((void **)&mcore->mops[mcore->cmop].ptr, LTERM);
mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes;
break;
default:
gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type);
}
}
DONE:
;
/*printf("MCPPOP: %zu\n", mcore->cmop); */
}
/*************************************************************************/
/*! This function frees all mops since the last push. This version is
for poping the gkmcore and it uses free instead of gk_free.
*/
/*************************************************************************/
void gk_gkmcorePop(gk_mcore_t *mcore)
{
while (mcore->cmop > 0) {
mcore->cmop--;
switch (mcore->mops[mcore->cmop].type) {
case GK_MOPT_MARK: /* push marker */
goto DONE;
break;
case GK_MOPT_HEAP: /* heap free */
free(mcore->mops[mcore->cmop].ptr);
mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes;
break;
default:
gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type);
}
}
DONE:
;
}
/*************************************************************************/
/*! Adds a memory allocation at the end of the list.
*/
/*************************************************************************/
void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr)
{
if (mcore->cmop == mcore->nmops) {
mcore->nmops *= 2;
mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t));
if (mcore->mops == NULL)
gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n");
}
mcore->mops[mcore->cmop].type = type;
mcore->mops[mcore->cmop].nbytes = nbytes;
mcore->mops[mcore->cmop].ptr = ptr;
mcore->cmop++;
switch (type) {
case GK_MOPT_MARK:
break;
case GK_MOPT_CORE:
mcore->num_callocs++;
mcore->size_callocs += nbytes;
mcore->cur_callocs += nbytes;
if (mcore->max_callocs < mcore->cur_callocs)
mcore->max_callocs = mcore->cur_callocs;
break;
case GK_MOPT_HEAP:
mcore->num_hallocs++;
mcore->size_hallocs += nbytes;
mcore->cur_hallocs += nbytes;
if (mcore->max_hallocs < mcore->cur_hallocs)
mcore->max_hallocs = mcore->cur_hallocs;
break;
default:
gk_errexit(SIGMEM, "Incorrect mcore type operation.\n");
}
}
/*************************************************************************/
/*! Adds a memory allocation at the end of the list. This is the gkmcore
version.
*/
/*************************************************************************/
void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr)
{
if (mcore->cmop == mcore->nmops) {
mcore->nmops *= 2;
mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t));
if (mcore->mops == NULL)
gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n");
}
mcore->mops[mcore->cmop].type = type;
mcore->mops[mcore->cmop].nbytes = nbytes;
mcore->mops[mcore->cmop].ptr = ptr;
mcore->cmop++;
switch (type) {
case GK_MOPT_MARK:
break;
case GK_MOPT_HEAP:
mcore->num_hallocs++;
mcore->size_hallocs += nbytes;
mcore->cur_hallocs += nbytes;
if (mcore->max_hallocs < mcore->cur_hallocs)
mcore->max_hallocs = mcore->cur_hallocs;
break;
default:
gk_errexit(SIGMEM, "Incorrect mcore type operation.\n");
}
}
/*************************************************************************/
/*! This function deletes the mop associated with the supplied pointer.
The mop has to be a heap allocation, otherwise it fails violently.
*/
/*************************************************************************/
void gk_mcoreDel(gk_mcore_t *mcore, void *ptr)
{
int i;
for (i=mcore->cmop-1; i>=0; i--) {
if (mcore->mops[i].type == GK_MOPT_MARK)
gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr);
if (mcore->mops[i].ptr == ptr) {
if (mcore->mops[i].type != GK_MOPT_HEAP)
gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n");
mcore->cur_hallocs -= mcore->mops[i].nbytes;
mcore->mops[i] = mcore->mops[--mcore->cmop];
return;
}
}
gk_errexit(SIGMEM, "mcoreDel should never have been here!\n");
}
/*************************************************************************/
/*! This function deletes the mop associated with the supplied pointer.
The mop has to be a heap allocation, otherwise it fails violently.
This is the gkmcore version.
*/
/*************************************************************************/
void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr)
{
int i;
for (i=mcore->cmop-1; i>=0; i--) {
if (mcore->mops[i].type == GK_MOPT_MARK)
gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr);
if (mcore->mops[i].ptr == ptr) {
if (mcore->mops[i].type != GK_MOPT_HEAP)
gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n");
mcore->cur_hallocs -= mcore->mops[i].nbytes;
mcore->mops[i] = mcore->mops[--mcore->cmop];
return;
}
}
gk_errexit(SIGMEM, "gkmcoreDel should never have been here!\n");
}

@ -0,0 +1,307 @@
/*!
\file memory.c
\brief This file contains various allocation routines
The allocation routines included are for 1D and 2D arrays of the
most datatypes that GKlib support. Many of these routines are
defined with the help of the macros in gk_memory.h. These macros
can be used to define other memory allocation routines.
\date Started 4/3/2007
\author George
\version\verbatim $Id: memory.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim
*/
#include <GKlib.h>
/* This is for the global mcore that tracks all heap allocations */
static __thread gk_mcore_t *gkmcore = NULL;
/*************************************************************************/
/*! Define the set of memory allocation routines for each data type */
/**************************************************************************/
GK_MKALLOC(gk_c, char)
GK_MKALLOC(gk_i, int)
GK_MKALLOC(gk_i8, int8_t)
GK_MKALLOC(gk_i16, int16_t)
GK_MKALLOC(gk_i32, int32_t)
GK_MKALLOC(gk_i64, int64_t)
GK_MKALLOC(gk_ui8, uint8_t)
GK_MKALLOC(gk_ui16, uint16_t)
GK_MKALLOC(gk_ui32, uint32_t)
GK_MKALLOC(gk_ui64, uint64_t)
GK_MKALLOC(gk_z, ssize_t)
GK_MKALLOC(gk_zu, size_t)
GK_MKALLOC(gk_f, float)
GK_MKALLOC(gk_d, double)
GK_MKALLOC(gk_idx, gk_idx_t)
GK_MKALLOC(gk_ckv, gk_ckv_t)
GK_MKALLOC(gk_ikv, gk_ikv_t)
GK_MKALLOC(gk_i8kv, gk_i8kv_t)
GK_MKALLOC(gk_i16kv, gk_i16kv_t)
GK_MKALLOC(gk_i32kv, gk_i32kv_t)
GK_MKALLOC(gk_i64kv, gk_i64kv_t)
GK_MKALLOC(gk_zkv, gk_zkv_t)
GK_MKALLOC(gk_zukv, gk_zukv_t)
GK_MKALLOC(gk_fkv, gk_fkv_t)
GK_MKALLOC(gk_dkv, gk_dkv_t)
GK_MKALLOC(gk_skv, gk_skv_t)
GK_MKALLOC(gk_idxkv, gk_idxkv_t)
/*************************************************************************/
/*! This function allocates a two-dimensional matrix.
*/
/*************************************************************************/
void gk_AllocMatrix(void ***r_matrix, size_t elmlen, size_t ndim1, size_t ndim2)
{
size_t i, j;
void **matrix;
*r_matrix = NULL;
if ((matrix = (void **)gk_malloc(ndim1*sizeof(void *), "gk_AllocMatrix: matrix")) == NULL)
return;
for (i=0; i<ndim1; i++) {
if ((matrix[i] = (void *)gk_malloc(ndim2*elmlen, "gk_AllocMatrix: matrix[i]")) == NULL) {
for (j=0; j<i; j++)
gk_free((void **)&matrix[j], LTERM);
return;
}
}
*r_matrix = matrix;
}
/*************************************************************************/
/*! This function frees a two-dimensional matrix.
*/
/*************************************************************************/
void gk_FreeMatrix(void ***r_matrix, size_t ndim1, size_t ndim2)
{
size_t i;
void **matrix;
if ((matrix = *r_matrix) == NULL)
return;
for (i=0; i<ndim1; i++)
gk_free((void **)&matrix[i], LTERM);
gk_free((void **)r_matrix, LTERM);
}
/*************************************************************************/
/*! This function initializes tracking of heap allocations.
*/
/*************************************************************************/
int gk_malloc_init()
{
if (gkmcore == NULL)
gkmcore = gk_gkmcoreCreate();
if (gkmcore == NULL)
return 0;
gk_gkmcorePush(gkmcore);
return 1;
}
/*************************************************************************/
/*! This function frees the memory that has been allocated since the
last call to gk_malloc_init().
*/
/*************************************************************************/
void gk_malloc_cleanup(int showstats)
{
if (gkmcore != NULL) {
gk_gkmcorePop(gkmcore);
if (gkmcore->cmop == 0) {
gk_gkmcoreDestroy(&gkmcore, showstats);
gkmcore = NULL;
}
}
}
/*************************************************************************/
/*! This function is my wrapper around malloc that provides the following
enhancements over malloc:
* It always allocates one byte of memory, even if 0 bytes are requested.
This is to ensure that checks of returned values do not lead to NULL
due to 0 bytes requested.
* It zeros-out the memory that is allocated. This is for a quick init
of the underlying datastructures.
*/
/**************************************************************************/
void *gk_malloc(size_t nbytes, char *msg)
{
void *ptr=NULL;
if (nbytes == 0)
nbytes++; /* Force mallocs to actually allocate some memory */
ptr = (void *)malloc(nbytes);
if (ptr == NULL) {
fprintf(stderr, " Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed());
fprintf(stderr, " Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed());
gk_errexit(SIGMEM, "***Memory allocation failed for %s. Requested size: %zu bytes",
msg, nbytes);
return NULL;
}
/* add this memory allocation */
if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr);
return ptr;
}
/*************************************************************************
* This function is my wrapper around realloc
**************************************************************************/
void *gk_realloc(void *oldptr, size_t nbytes, char *msg)
{
void *ptr=NULL;
if (nbytes == 0)
nbytes++; /* Force mallocs to actually allocate some memory */
/* remove this memory de-allocation */
if (gkmcore != NULL && oldptr != NULL) gk_gkmcoreDel(gkmcore, oldptr);
ptr = (void *)realloc(oldptr, nbytes);
if (ptr == NULL) {
fprintf(stderr, " Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed());
fprintf(stderr, " Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed());
gk_errexit(SIGMEM, "***Memory realloc failed for %s. " "Requested size: %zu bytes",
msg, nbytes);
return NULL;
}
/* add this memory allocation */
if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr);
return ptr;
}
/*************************************************************************
* This function is my wrapper around free, allows multiple pointers
**************************************************************************/
void gk_free(void **ptr1,...)
{
va_list plist;
void **ptr;
if (*ptr1 != NULL) {
free(*ptr1);
/* remove this memory de-allocation */
if (gkmcore != NULL)
gk_gkmcoreDel(gkmcore, *ptr1);
}
*ptr1 = NULL;
va_start(plist, ptr1);
while ((ptr = va_arg(plist, void **)) != LTERM) {
if (*ptr != NULL) {
free(*ptr);
/* remove this memory de-allocation */
if (gkmcore != NULL)
gk_gkmcoreDel(gkmcore, *ptr);
}
*ptr = NULL;
}
va_end(plist);
}
/*************************************************************************
* This function returns the current ammount of dynamically allocated
* memory that is used by the system
**************************************************************************/
size_t gk_GetCurMemoryUsed()
{
if (gkmcore == NULL)
return 0;
else
return gkmcore->cur_hallocs;
}
/*************************************************************************
* This function returns the maximum ammount of dynamically allocated
* memory that was used by the system
**************************************************************************/
size_t gk_GetMaxMemoryUsed()
{
if (gkmcore == NULL)
return 0;
else
return gkmcore->max_hallocs;
}
/*************************************************************************/
/*! This function returns the VmSize and VmRSS of the calling process. */
/*************************************************************************/
void gk_GetVMInfo(size_t *vmsize, size_t *vmrss)
{
FILE *fp;
char fname[1024];
sprintf(fname, "/proc/%d/statm", getpid());
fp = gk_fopen(fname, "r", "proc/pid/statm");
if (fscanf(fp, "%zu %zu", vmsize, vmrss) != 2)
errexit("Failed to read to values from %s\n", fname);
gk_fclose(fp);
/*
*vmsize *= sysconf(_SC_PAGESIZE);
*vmrss *= sysconf(_SC_PAGESIZE);
*/
return;
}
/*************************************************************************/
/*! This function returns the peak virtual memory of the calling process
by reading the VmPeak field in /proc/self/status . */
/*************************************************************************/
size_t gk_GetProcVmPeak()
{
FILE *fp;
char line[128];
size_t vmpeak=0;
if (gk_fexists("/proc/self/status")) {
fp = gk_fopen("/proc/self/status", "r", "proc/self/status");
while (fgets(line, 128, fp) != NULL) {
if (strncmp(line, "VmPeak:", 7) == 0) {
vmpeak = atoll(line+8)*1024;
break;
}
}
gk_fclose(fp);
}
return vmpeak;
}

@ -0,0 +1,25 @@
/*!
\file pqueue.c
\brief This file implements various max-priority queues.
The priority queues are generated using the GK_MKPQUEUE macro.
\date Started 3/27/2007
\author George
\version\verbatim $Id: pqueue.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Create the various max priority queues */
/*************************************************************************/
#define key_gt(a, b) ((a) > (b))
GK_MKPQUEUE(gk_ipq, gk_ipq_t, gk_ikv_t, int, gk_idx_t, gk_ikvmalloc, INT_MAX, key_gt)
GK_MKPQUEUE(gk_i32pq, gk_i32pq_t, gk_i32kv_t, int32_t, gk_idx_t, gk_i32kvmalloc, INT32_MAX, key_gt)
GK_MKPQUEUE(gk_i64pq, gk_i64pq_t, gk_i64kv_t, int64_t, gk_idx_t, gk_i64kvmalloc, INT64_MAX, key_gt)
GK_MKPQUEUE(gk_fpq, gk_fpq_t, gk_fkv_t, float, gk_idx_t, gk_fkvmalloc, FLT_MAX, key_gt)
GK_MKPQUEUE(gk_dpq, gk_dpq_t, gk_dkv_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX, key_gt)
GK_MKPQUEUE(gk_idxpq, gk_idxpq_t, gk_idxkv_t, gk_idx_t, gk_idx_t, gk_idxkvmalloc, GK_IDX_MAX, key_gt)
#undef key_gt

@ -0,0 +1,136 @@
/*!
\file
\brief Various routines for providing portable 32 and 64 bit random number
generators.
\date Started 5/17/2007
\author George
\version\verbatim $Id: random.c 18796 2015-06-02 11:39:45Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Create the various random number functions */
/*************************************************************************/
GK_MKRANDOM(gk_c, size_t, char)
GK_MKRANDOM(gk_i, size_t, int)
GK_MKRANDOM(gk_i32, size_t, int32_t)
GK_MKRANDOM(gk_f, size_t, float)
GK_MKRANDOM(gk_d, size_t, double)
GK_MKRANDOM(gk_idx, size_t, gk_idx_t)
GK_MKRANDOM(gk_z, size_t, ssize_t)
GK_MKRANDOM(gk_zu, size_t, size_t)
/*************************************************************************/
/*! GKlib's built in random number generator for portability across
different architectures */
/*************************************************************************/
#ifdef USE_GKRAND
/*
A C-program for MT19937-64 (2004/9/29 version).
Coded by Takuji Nishimura and Makoto Matsumoto.
This is a 64-bit version of Mersenne Twister pseudorandom number
generator.
Before using, initialize the state by using init_genrand64(seed)
or init_by_array64(init_key, key_length).
Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura,
All rights reserved.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define NN 312
#define MM 156
#define MATRIX_A 0xB5026F5AA96619E9ULL
#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */
#define LM 0x7FFFFFFFULL /* Least significant 31 bits */
/* The array for the state vector */
static uint64_t mt[NN];
/* mti==NN+1 means mt[NN] is not initialized */
static int mti=NN+1;
#endif /* USE_GKRAND */
/* initializes mt[NN] with a seed */
void gk_randinit(uint64_t seed)
{
#ifdef USE_GKRAND
mt[0] = seed;
for (mti=1; mti<NN; mti++)
mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti);
#else
srand((unsigned int) seed);
#endif
}
/* generates a random number on [0, 2^64-1]-interval */
uint64_t gk_randint64(void)
{
#ifdef USE_GKRAND
int i;
unsigned long long x;
static uint64_t mag01[2]={0ULL, MATRIX_A};
if (mti >= NN) { /* generate NN words at one time */
/* if init_genrand64() has not been called, */
/* a default initial seed is used */
if (mti == NN+1)
gk_randinit(5489ULL);
for (i=0; i<NN-MM; i++) {
x = (mt[i]&UM)|(mt[i+1]&LM);
mt[i] = mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
}
for (; i<NN-1; i++) {
x = (mt[i]&UM)|(mt[i+1]&LM);
mt[i] = mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
}
x = (mt[NN-1]&UM)|(mt[0]&LM);
mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
mti = 0;
}
x = mt[mti++];
x ^= (x >> 29) & 0x5555555555555555ULL;
x ^= (x << 17) & 0x71D67FFFEDA60000ULL;
x ^= (x << 37) & 0xFFF7EEE000000000ULL;
x ^= (x >> 43);
return x & 0x7FFFFFFFFFFFFFFF;
#else
return (uint64_t)(((uint64_t) rand()) << 32 | ((uint64_t) rand()));
#endif
}
/* generates a random number on [0, 2^32-1]-interval */
uint32_t gk_randint32(void)
{
#ifdef USE_GKRAND
return (uint32_t)(gk_randint64() & 0x7FFFFFFF);
#else
return (uint32_t)rand();
#endif
}

103
rw.c

@ -0,0 +1,103 @@
/*!
* \file
*
* \brief Various routines that perform random-walk based operations
on graphs stored as gk_csr_t matrices.
*
* \author George Karypis
* \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Computes the (personalized) page-rank of the vertices in a graph.
\param mat is the matrix storing the graph.
\param lamda is the restart probability.
\param eps is the error tolerance for convergance.
\param max_niter is the maximum number of allowed iterations.
\param pr on entry stores the restart distribution of the vertices.
This allows for the computation of personalized page-rank scores
by appropriately setting that parameter.
On return, pr stores the computed page ranks.
\returns the number of iterations that were performed.
*/
/**************************************************************************/
int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr)
{
ssize_t i, j, k, iter, nrows;
double *rscale, *prold, *prnew, *prtmp;
double fromsinks, error;
ssize_t *rowptr;
int *rowind;
float *rowval;
nrows = mat->nrows;
rowptr = mat->rowptr;
rowind = mat->rowind;
rowval = mat->rowval;
prold = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew");
prnew = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold");
rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale");
/* compute the scaling factors to get adjacency weights into transition
probabilities */
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++)
rscale[i] += rowval[j];
if (rscale[i] > 0)
rscale[i] = 1.0/rscale[i];
}
/* the restart distribution is the initial pr scores */
for (i=0; i<nrows; i++)
prnew[i] = pr[i];
/* get into the PR iteration */
for (iter=0; iter<max_niter; iter++) {
gk_SWAP(prnew, prold, prtmp);
gk_dset(nrows, 0.0, prnew);
/* determine the total current PR score of the sinks so that you
can distribute them to all nodes according to the restart
distribution. */
for (fromsinks=0.0, i=0; i<nrows; i++) {
if (rscale[i] == 0)
fromsinks += prold[i];
}
/* push random-walk scores to the outlinks */
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++)
prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j];
}
/* apply the restart conditions */
for (i=0; i<nrows; i++) {
prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i];
}
/* compute the error */
for (error=0.0, i=0; i<nrows; i++)
error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error);
//printf("nrm1: %le maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error);
if (error < eps)
break;
}
/* store the computed pr scores into pr for output */
for (i=0; i<nrows; i++)
pr[i] = prnew[i];
gk_free((void **)&prnew, &prold, &rscale, LTERM);
return (int)(iter+1);
}

@ -0,0 +1,53 @@
#!/usr/bin/perl -w
die "Usage $0 <gfile> <ncopies>\n" unless @ARGV == 2;
$filein = shift(@ARGV);
$ncopies = shift(@ARGV);
open(FPIN, "<$filein") or die "Could not open $filein. $!\n";
$_ = <FPIN>;
chomp($_);
($nvtxs, $nedges) = split(' ', $_);
#print "nvtxs: $nvtxs, nedges: $nedges\n";
$u = 1;
while (<FPIN>) {
chomp($_);
@edges = split(' ', $_);
# put the within layer edges
foreach $v (@edges) {
next if $v < $u;
for ($i=0; $i<$ncopies; $i++) {
printf("%d %d\n", $i*$nvtxs+$u-1, $i*$nvtxs+$v-1);
printf("%d %d\n", $i*$nvtxs+$v-1, $i*$nvtxs+$u-1);
}
}
# put the vertex across layer edges
for ($i=0; $i<$ncopies-1; $i++) {
printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$u-1);
printf("%d %d\n", ($i+1)*$nvtxs+$u-1, $i*$nvtxs+$u-1);
}
# put the adjacent across layer edges
for ($i=0; $i<$ncopies-1; $i++) {
$j=0;
foreach $v (@edges) {
$j++;
next if (($j+$i)%2 == 0);
printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$v-1);
printf("%d %d\n", ($i+1)*$nvtxs+$v-1, $i*$nvtxs+$u-1);
}
}
goto DONE;
DONE:
$u++;
}
close(FPIN);

174
seq.c

@ -0,0 +1,174 @@
/*
*
* Sequence handler library by Huzefa Rangwala
* Date : 03.01.2007
*
*
*
*/
#include <GKlib.h>
/*********************************************************/
/* ! \brief Initializes the <tt>gk_seq_t</tt> variable
\param A pointer to gk_seq_t itself
\returns null
*/
/***********************************************************************/
void gk_seq_init(gk_seq_t *seq)
{
seq->len = 0;
seq->sequence = NULL;
seq->pssm = NULL;
seq->psfm = NULL;
seq->name = NULL;
}
/***********************************************************************/
/*! \brief This function creates the localizations for the various sequences
\param string i.e amino acids, nucleotides, sequences
\returns gk_i2cc2i_t variable
*/
/*********************************************************************/
gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
{
int nsymbols;
gk_idx_t i;
gk_i2cc2i_t *t;
nsymbols = strlen(alphabet);
t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
t->n = nsymbols;
t->i2c = gk_cmalloc(256, "gk_i2c_create_common");
t->c2i = gk_imalloc(256, "gk_i2c_create_common");
gk_cset(256, -1, t->i2c);
gk_iset(256, -1, t->c2i);
for(i=0;i<nsymbols;i++){
t->i2c[i] = alphabet[i];
t->c2i[(int)alphabet[i]] = i;
}
return t;
}
/*********************************************************************/
/*! \brief This function reads a pssm in the format of gkmod pssm
\param file_name is the name of the pssm file
\returns gk_seq_t
*/
/********************************************************************/
gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
{
gk_seq_t *seq;
gk_idx_t i, j, ii;
size_t ntokens, nbytes, len;
FILE *fpin;
gk_Tokens_t tokens;
static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
static int PSSMWIDTH = 20;
char *header, line[MAXLINELEN];
gk_i2cc2i_t *converter;
header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
converter = gk_i2cc2i_create_common(AAORDER);
gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
len --;
seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
gk_seq_init(seq);
seq->len = len;
seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
seq->nsymbols = PSSMWIDTH;
seq->name = gk_getbasename(filename);
fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
/* Read the header line */
if (fgets(line, MAXLINELEN-1, fpin) == NULL)
errexit("Unexpected end of file: %s\n", filename);
gk_strtoupper(line);
gk_strtokenize(line, " \t\n", &tokens);
for (i=0; i<PSSMWIDTH; i++)
header[i] = tokens.list[i][0];
gk_freetokenslist(&tokens);
/* Read the rest of the lines */
for (i=0, ii=0; ii<len; ii++) {
if (fgets(line, MAXLINELEN-1, fpin) == NULL)
errexit("Unexpected end of file: %s\n", filename);
gk_strtoupper(line);
gk_strtokenize(line, " \t\n", &tokens);
seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
for (j=0; j<PSSMWIDTH; j++) {
seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
}
gk_freetokenslist(&tokens);
i++;
}
seq->len = i; /* Reset the length if certain characters were skipped */
gk_free((void **)&header, LTERM);
gk_fclose(fpin);
return seq;
}
/**************************************************************************/
/*! \brief This function frees the memory allocated to the seq structure.
\param gk_seq_t
\returns nothing
*/
/**************************************************************************/
void gk_seq_free(gk_seq_t *seq)
{
gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
gk_free((void **)&seq->name, &seq->sequence, LTERM);
//gk_free((void **)&seq, LTERM);
gk_free((void **) &seq, LTERM);
}

437
sort.c

@ -0,0 +1,437 @@
/*!
\file sort.c
\brief This file contains GKlib's various sorting routines
These routines are implemented using the GKSORT macro that is defined
in gk_qsort.h and is based on GNU's GLIBC qsort() implementation.
Additional sorting routines can be created using the same way that
these routines where defined.
\date Started 4/4/07
\author George
\version\verbatim $Id: sort.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Sorts an array of chars in increasing order */
/*************************************************************************/
void gk_csorti(size_t n, char *base)
{
#define char_lt(a, b) ((*a) < (*b))
GK_MKQSORT(char, base, n, char_lt);
#undef char_lt
}
/*************************************************************************/
/*! Sorts an array of chars in decreasing order */
/*************************************************************************/
void gk_csortd(size_t n, char *base)
{
#define char_gt(a, b) ((*a) > (*b))
GK_MKQSORT(char, base, n, char_gt);
#undef char_gt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_isorti(size_t n, int *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(int, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_isortd(size_t n, int *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(int, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_i32sorti(size_t n, int32_t *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(int32_t, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_i32sortd(size_t n, int32_t *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(int32_t, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_i64sorti(size_t n, int64_t *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(int64_t, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_ui32sorti(size_t n, uint32_t *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(uint32_t, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_ui32sortd(size_t n, uint32_t *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(uint32_t, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of integers in increasing order */
/*************************************************************************/
void gk_ui64sorti(size_t n, uint64_t *base)
{
#define int_lt(a, b) ((*a) < (*b))
GK_MKQSORT(uint64_t, base, n, int_lt);
#undef int_lt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_ui64sortd(size_t n, uint64_t *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(uint64_t, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of integers in decreasing order */
/*************************************************************************/
void gk_i64sortd(size_t n, int64_t *base)
{
#define int_gt(a, b) ((*a) > (*b))
GK_MKQSORT(int64_t, base, n, int_gt);
#undef int_gt
}
/*************************************************************************/
/*! Sorts an array of floats in increasing order */
/*************************************************************************/
void gk_fsorti(size_t n, float *base)
{
#define float_lt(a, b) ((*a) < (*b))
GK_MKQSORT(float, base, n, float_lt);
#undef float_lt
}
/*************************************************************************/
/*! Sorts an array of floats in decreasing order */
/*************************************************************************/
void gk_fsortd(size_t n, float *base)
{
#define float_gt(a, b) ((*a) > (*b))
GK_MKQSORT(float, base, n, float_gt);
#undef float_gt
}
/*************************************************************************/
/*! Sorts an array of doubles in increasing order */
/*************************************************************************/
void gk_dsorti(size_t n, double *base)
{
#define double_lt(a, b) ((*a) < (*b))
GK_MKQSORT(double, base, n, double_lt);
#undef double_lt
}
/*************************************************************************/
/*! Sorts an array of doubles in decreasing order */
/*************************************************************************/
void gk_dsortd(size_t n, double *base)
{
#define double_gt(a, b) ((*a) > (*b))
GK_MKQSORT(double, base, n, double_gt);
#undef double_gt
}
/*************************************************************************/
/*! Sorts an array of gk_idx_t in increasing order */
/*************************************************************************/
void gk_idxsorti(size_t n, gk_idx_t *base)
{
#define idx_lt(a, b) ((*a) < (*b))
GK_MKQSORT(gk_idx_t, base, n, idx_lt);
#undef idx_lt
}
/*************************************************************************/
/*! Sorts an array of gk_idx_t in decreasing order */
/*************************************************************************/
void gk_idxsortd(size_t n, gk_idx_t *base)
{
#define idx_gt(a, b) ((*a) > (*b))
GK_MKQSORT(gk_idx_t, base, n, idx_gt);
#undef idx_gt
}
/*************************************************************************/
/*! Sorts an array of gk_ckv_t in increasing order */
/*************************************************************************/
void gk_ckvsorti(size_t n, gk_ckv_t *base)
{
#define ckey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_ckv_t, base, n, ckey_lt);
#undef ckey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_ckv_t in decreasing order */
/*************************************************************************/
void gk_ckvsortd(size_t n, gk_ckv_t *base)
{
#define ckey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_ckv_t, base, n, ckey_gt);
#undef ckey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_ikv_t in increasing order */
/*************************************************************************/
void gk_ikvsorti(size_t n, gk_ikv_t *base)
{
#define ikey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_ikv_t, base, n, ikey_lt);
#undef ikey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_ikv_t in decreasing order */
/*************************************************************************/
void gk_ikvsortd(size_t n, gk_ikv_t *base)
{
#define ikey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_ikv_t, base, n, ikey_gt);
#undef ikey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_i32kv_t in increasing order */
/*************************************************************************/
void gk_i32kvsorti(size_t n, gk_i32kv_t *base)
{
#define ikey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_i32kv_t, base, n, ikey_lt);
#undef ikey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_i32kv_t in decreasing order */
/*************************************************************************/
void gk_i32kvsortd(size_t n, gk_i32kv_t *base)
{
#define ikey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_i32kv_t, base, n, ikey_gt);
#undef ikey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_i64kv_t in increasing order */
/*************************************************************************/
void gk_i64kvsorti(size_t n, gk_i64kv_t *base)
{
#define ikey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_i64kv_t, base, n, ikey_lt);
#undef ikey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_i64kv_t in decreasing order */
/*************************************************************************/
void gk_i64kvsortd(size_t n, gk_i64kv_t *base)
{
#define ikey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_i64kv_t, base, n, ikey_gt);
#undef ikey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_zkv_t in increasing order */
/*************************************************************************/
void gk_zkvsorti(size_t n, gk_zkv_t *base)
{
#define zkey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_zkv_t, base, n, zkey_lt);
#undef zkey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_zkv_t in decreasing order */
/*************************************************************************/
void gk_zkvsortd(size_t n, gk_zkv_t *base)
{
#define zkey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_zkv_t, base, n, zkey_gt);
#undef zkey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_zukv_t in increasing order */
/*************************************************************************/
void gk_zukvsorti(size_t n, gk_zukv_t *base)
{
#define zukey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_zukv_t, base, n, zukey_lt);
#undef zukey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_zukv_t in decreasing order */
/*************************************************************************/
void gk_zukvsortd(size_t n, gk_zukv_t *base)
{
#define zukey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_zukv_t, base, n, zukey_gt);
#undef zukey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_fkv_t in increasing order */
/*************************************************************************/
void gk_fkvsorti(size_t n, gk_fkv_t *base)
{
#define fkey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_fkv_t, base, n, fkey_lt);
#undef fkey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_fkv_t in decreasing order */
/*************************************************************************/
void gk_fkvsortd(size_t n, gk_fkv_t *base)
{
#define fkey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_fkv_t, base, n, fkey_gt);
#undef fkey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_dkv_t in increasing order */
/*************************************************************************/
void gk_dkvsorti(size_t n, gk_dkv_t *base)
{
#define dkey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_dkv_t, base, n, dkey_lt);
#undef dkey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_fkv_t in decreasing order */
/*************************************************************************/
void gk_dkvsortd(size_t n, gk_dkv_t *base)
{
#define dkey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_dkv_t, base, n, dkey_gt);
#undef dkey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_skv_t in increasing order */
/*************************************************************************/
void gk_skvsorti(size_t n, gk_skv_t *base)
{
#define skey_lt(a, b) (strcmp((a)->key, (b)->key) < 0)
GK_MKQSORT(gk_skv_t, base, n, skey_lt);
#undef skey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_skv_t in decreasing order */
/*************************************************************************/
void gk_skvsortd(size_t n, gk_skv_t *base)
{
#define skey_gt(a, b) (strcmp((a)->key, (b)->key) > 0)
GK_MKQSORT(gk_skv_t, base, n, skey_gt);
#undef skey_gt
}
/*************************************************************************/
/*! Sorts an array of gk_idxkv_t in increasing order */
/*************************************************************************/
void gk_idxkvsorti(size_t n, gk_idxkv_t *base)
{
#define idxkey_lt(a, b) ((a)->key < (b)->key)
GK_MKQSORT(gk_idxkv_t, base, n, idxkey_lt);
#undef idxkey_lt
}
/*************************************************************************/
/*! Sorts an array of gk_idxkv_t in decreasing order */
/*************************************************************************/
void gk_idxkvsortd(size_t n, gk_idxkv_t *base)
{
#define idxkey_gt(a, b) ((a)->key > (b)->key)
GK_MKQSORT(gk_idxkv_t, base, n, idxkey_gt);
#undef idxkey_gt
}

@ -0,0 +1,530 @@
/************************************************************************/
/*! \file
\brief Functions for manipulating strings.
Various functions for manipulating strings. Some of these functions
provide new functionality, whereas others are drop-in replacements
of standard functions (but with enhanced functionality).
\date Started 11/1/99
\author George
\version $Id: string.c 14330 2013-05-18 12:15:15Z karypis $
*/
/************************************************************************/
/* the following is for strptime() */
#define _XOPEN_SOURCE
#include <time.h>
#undef _XOPEN_SOURCE
#include <GKlib.h>
/************************************************************************/
/*! \brief Replaces certain characters in a string.
This function takes a string and replaces all the characters in the
\c fromlist with the corresponding characters from the \c tolist.
That is, each occurence of <tt>fromlist[i]</tt> is replaced by
<tt>tolist[i]</tt>.
If the \c tolist is shorter than \c fromlist, then the corresponding
characters are deleted. The modifications on \c str are done in place.
It tries to provide a functionality similar to Perl's \b tr// function.
\param str is the string whose characters will be replaced.
\param fromlist is the set of characters to be replaced.
\param tolist is the set of replacement characters .
\returns A pointer to \c str itself.
*/
/************************************************************************/
char *gk_strchr_replace(char *str, char *fromlist, char *tolist)
{
ssize_t i, j, k, len, fromlen, tolen;
len = strlen(str);
fromlen = strlen(fromlist);
tolen = strlen(tolist);
for (i=j=0; i<len; i++) {
for (k=0; k<fromlen; k++) {
if (str[i] == fromlist[k]) {
if (k < tolen)
str[j++] = tolist[k];
break;
}
}
if (k == fromlen)
str[j++] = str[i];
}
str[j] = '\0';
return str;
}
/************************************************************************/
/*! \brief Regex-based search-and-replace function
This function is a C implementation of Perl's <tt> s//</tt> regular-expression
based substitution function.
\param str
is the input string on which the operation will be performed.
\param pattern
is the regular expression for the pattern to be matched for substitution.
\param replacement
is the replacement string, in which the possible captured pattern substrings
are referred to as $1, $2, ..., $9. The entire matched pattern is refered
to as $0.
\param options
is a string specified options for the substitution operation. Currently the
<tt>"i"</tt> (case insensitive) and <tt>"g"</tt> (global substitution) are
supported.
\param new_str
is a reference to a pointer that will store a pointer to the newly created
string that results from the substitutions. This string is allocated via
gk_malloc() and needs to be freed using gk_free(). The string is returned
even if no substitutions were performed.
\returns
If successful, it returns 1 + the number of substitutions that were performed.
Thus, if no substitutions were performed, the returned value will be 1.
Otherwise it returns 0. In case of error, a meaningful error message is
returned in <tt>newstr</tt>, which also needs to be freed afterwards.
*/
/************************************************************************/
int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options,
char **new_str)
{
ssize_t i, len, rlen, nlen, offset, noffset;
int j, rc, flags, global, nmatches;
regex_t re;
regmatch_t matches[10];
/* Parse the options */
flags = REG_EXTENDED;
if (strchr(options, 'i') != NULL)
flags = flags | REG_ICASE;
global = (strchr(options, 'g') != NULL ? 1 : 0);
/* Compile the regex */
if ((rc = regcomp(&re, pattern, flags)) != 0) {
len = regerror(rc, &re, NULL, 0);
*new_str = gk_cmalloc(len, "gk_strstr_replace: new_str");
regerror(rc, &re, *new_str, len);
return 0;
}
/* Prepare the output string */
len = strlen(str);
nlen = 2*len;
noffset = 0;
*new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str");
/* Get into the matching-replacing loop */
rlen = strlen(replacement);
offset = 0;
nmatches = 0;
do {
rc = regexec(&re, str+offset, 10, matches, 0);
if (rc == REG_ESPACE) {
gk_free((void **)new_str, LTERM);
*new_str = gk_strdup("regexec ran out of memory.");
regfree(&re);
return 0;
}
else if (rc == REG_NOMATCH) {
if (nlen-noffset < len-offset) {
nlen += (len-offset) - (nlen-noffset);
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
strcpy(*new_str+noffset, str+offset);
noffset += (len-offset);
break;
}
else { /* A match was found! */
nmatches++;
/* Copy the left unmatched portion of the string */
if (matches[0].rm_so > 0) {
if (nlen-noffset < matches[0].rm_so) {
nlen += matches[0].rm_so - (nlen-noffset);
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
strncpy(*new_str+noffset, str+offset, matches[0].rm_so);
noffset += matches[0].rm_so;
}
/* Go and append the replacement string */
for (i=0; i<rlen; i++) {
switch (replacement[i]) {
case '\\':
if (i+1 < rlen) {
if (nlen-noffset < 1) {
nlen += nlen + 1;
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
*new_str[noffset++] = replacement[++i];
}
else {
gk_free((void **)new_str, LTERM);
*new_str = gk_strdup("Error in replacement string. Missing character following '\'.");
regfree(&re);
return 0;
}
break;
case '$':
if (i+1 < rlen) {
j = (int)(replacement[++i] - '0');
if (j < 0 || j > 9) {
gk_free((void **)new_str, LTERM);
*new_str = gk_strdup("Error in captured subexpression specification.");
regfree(&re);
return 0;
}
if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) {
nlen += nlen + (matches[j].rm_eo-matches[j].rm_so);
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo);
noffset += matches[j].rm_eo-matches[j].rm_so;
}
else {
gk_free((void **)new_str, LTERM);
*new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'.");
regfree(&re);
return 0;
}
break;
default:
if (nlen-noffset < 1) {
nlen += nlen + 1;
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
(*new_str)[noffset++] = replacement[i];
}
}
/* Update the offset of str for the next match */
offset += matches[0].rm_eo;
if (!global) {
/* Copy the right portion of the string if no 'g' option */
if (nlen-noffset < len-offset) {
nlen += (len-offset) - (nlen-noffset);
*new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
}
strcpy(*new_str+noffset, str+offset);
noffset += (len-offset);
}
}
} while (global);
(*new_str)[noffset] = '\0';
regfree(&re);
return nmatches + 1;
}
/************************************************************************/
/*! \brief Prunes characters from the end of the string.
This function removes any trailing characters that are included in the
\c rmlist. The trimming stops at the last character (i.e., first character
from the end) that is not in \c rmlist.
This function can be used to removed trailing spaces, newlines, etc.
This is a distructive operation as it modifies the string.
\param str is the string that will be trimmed.
\param rmlist contains the set of characters that will be removed.
\returns A pointer to \c str itself.
\sa gk_strhprune()
*/
/*************************************************************************/
char *gk_strtprune(char *str, char *rmlist)
{
ssize_t i, j, len;
len = strlen(rmlist);
for (i=strlen(str)-1; i>=0; i--) {
for (j=0; j<len; j++) {
if (str[i] == rmlist[j])
break;
}
if (j == len)
break;
}
str[i+1] = '\0';
return str;
}
/************************************************************************/
/*! \brief Prunes characters from the beginning of the string.
This function removes any starting characters that are included in the
\c rmlist. The trimming stops at the first character that is not in
\c rmlist.
This function can be used to removed leading spaces, tabs, etc.
This is a distructive operation as it modifies the string.
\param str is the string that will be trimmed.
\param rmlist contains the set of characters that will be removed.
\returns A pointer to \c str itself.
\sa gk_strtprune()
*/
/*************************************************************************/
char *gk_strhprune(char *str, char *rmlist)
{
ssize_t i, j, len;
len = strlen(rmlist);
for (i=0; str[i]; i++) {
for (j=0; j<len; j++) {
if (str[i] == rmlist[j])
break;
}
if (j == len)
break;
}
if (i>0) { /* If something needs to be removed */
for (j=0; str[i]; i++, j++)
str[j] = str[i];
str[j] = '\0';
}
return str;
}
/************************************************************************/
/*! \brief Converts a string to upper case.
This function converts a string to upper case. This operation modifies the
string itself.
\param str is the string whose case will be changed.
\returns A pointer to \c str itself.
\sa gk_strtolower()
*/
/*************************************************************************/
char *gk_strtoupper(char *str)
{
int i;
for (i=0; str[i]!='\0'; str[i]=toupper(str[i]), i++);
return str;
}
/************************************************************************/
/*! \brief Converts a string to lower case.
This function converts a string to lower case. This operation modifies the
string itself.
\param str is the string whose case will be changed.
\returns A pointer to \c str itself.
\sa gk_strtoupper()
*/
/*************************************************************************/
char *gk_strtolower(char *str)
{
int i;
for (i=0; str[i]!='\0'; str[i]=tolower(str[i]), i++);
return str;
}
/************************************************************************/
/*! \brief Duplicates a string
This function is a replacement for C's standard <em>strdup()</em> function.
The key differences between the two are that gk_strdup():
- uses the dynamic memory allocation routines of \e GKlib.
- it correctly handles NULL input strings.
The string that is returned must be freed by gk_free().
\param orgstr is the string that will be duplicated.
\returns A pointer to the newly created string.
\sa gk_free()
*/
/*************************************************************************/
char *gk_strdup(char *orgstr)
{
int len;
char *str=NULL;
if (orgstr != NULL) {
len = strlen(orgstr)+1;
str = gk_malloc(len*sizeof(char), "gk_strdup: str");
strcpy(str, orgstr);
}
return str;
}
/************************************************************************/
/*! \brief Case insensitive string comparison.
This function compares two strings for equality by ignoring the case of the
strings.
\warning This function is \b not equivalent to a case-insensitive
<em>strcmp()</em> function, as it does not return ordering
information.
\todo Remove the above warning.
\param s1 is the first string to be compared.
\param s2 is the second string to be compared.
\retval 1 if the strings are identical,
\retval 0 otherwise.
*/
/*************************************************************************/
int gk_strcasecmp(char *s1, char *s2)
{
int i=0;
if (strlen(s1) != strlen(s2))
return 0;
while (s1[i] != '\0') {
if (tolower(s1[i]) != tolower(s2[i]))
return 0;
i++;
}
return 1;
}
/************************************************************************/
/*! \brief Compare two strings in revere order
This function is similar to strcmp but it performs the comparison as
if the two strings were reversed.
\param s1 is the first string to be compared.
\param s2 is the second string to be compared.
\retval -1, 0, 1, if the s1 < s2, s1 == s2, or s1 > s2.
*/
/*************************************************************************/
int gk_strrcmp(char *s1, char *s2)
{
int i1 = strlen(s1)-1;
int i2 = strlen(s2)-1;
while ((i1 >= 0) && (i2 >= 0)) {
if (s1[i1] != s2[i2])
return (s1[i1] - s2[i2]);
i1--;
i2--;
}
/* i1 == -1 and/or i2 == -1 */
if (i1 < i2)
return -1;
if (i1 > i2)
return 1;
return 0;
}
/************************************************************************/
/*! \brief Converts a time_t time into a string
This function takes a time_t-specified time and returns a string-formated
representation of the corresponding time. The format of the string is
<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
\param time is the time to be converted.
\return It returns a pointer to a statically allocated string that is
over-written in successive calls of this function. If the
conversion failed, it returns NULL.
*/
/*************************************************************************/
char *gk_time2str(time_t time)
{
static char datestr[128];
struct tm *tm;
tm = localtime(&time);
if (strftime(datestr, 128, "%m/%d/%Y %H:%M:%S", tm) == 0)
return NULL;
else
return datestr;
}
#if !defined(WIN32) && !defined(__MINGW32__)
/************************************************************************/
/*! \brief Converts a date/time string into its equivalent time_t value
This function takes date and/or time specification and converts it in
the equivalent time_t representation. The conversion is done using the
strptime() function. The format that gk_str2time() understands is
<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
\param str is the date/time string to be converted.
\return If the conversion was successful it returns the time, otherwise
it returns -1.
*/
/*************************************************************************/
time_t gk_str2time(char *str)
{
struct tm time;
time_t rtime;
memset(&time, '\0', sizeof(time));
if (strptime(str, "%m/%d/%Y %H:%M:%S", &time) == NULL)
return -1;
rtime = mktime(&time);
return (rtime < 0 ? 0 : rtime);
}
#endif
/*************************************************************************
* This function returns the ID of a particular string based on the
* supplied StringMap array
**************************************************************************/
int gk_GetStringID(gk_StringMap_t *strmap, char *key)
{
int i;
for (i=0; strmap[i].name; i++) {
if (gk_strcasecmp(key, strmap[i].name))
return strmap[i].id;
}
return -1;
}

@ -0,0 +1,19 @@
# Build program.
add_executable(strings strings.c)
add_executable(gksort gksort.c)
add_executable(fis fis.c)
add_executable(gkrw rw.c)
add_executable(gkgraph gkgraph.c)
add_executable(csrcnv csrcnv.c)
add_executable(grKx grKx.c)
add_executable(m2mnbrs m2mnbrs.c)
add_executable(cmpnbrs cmpnbrs.c)
add_executable(splatt2svd splatt2svd.c)
add_executable(gkuniq gkuniq.c)
foreach(prog strings gksort fis gkrw gkgraph csrcnv grKx m2mnbrs cmpnbrs splatt2svd gkuniq)
target_link_libraries(${prog} GKlib)
endforeach(prog)
# Install a subset of them
install(TARGETS csrcnv RUNTIME DESTINATION bin)

@ -0,0 +1,301 @@
/*!
\file
\brief It takes as input two CSR matrices A and B and computes how
similar AA' and A'A are to BB' and B'B, respectively in terms
of the cosine similarity of the corresponding rows.
\date 11/09/2015
\author George
\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int simtype; /*!< The similarity type to use */
int verbosity; /*!< The reporting verbosity level */
char *afile; /*!< The file storing the query documents */
char *bfile; /*!< The file storing the collection documents */
/* timers */
double timer_global;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
/* Versions */
#define VER_MAJOR 0
#define VER_MINOR 1
#define VER_SUBMINOR 0
/* Command-line option codes */
#define CMD_SIMTYPE 10
#define CMD_VERBOSITY 70
#define CMD_HELP 100
/* The text labels for the different simtypes */
static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""};
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"simtype", 1, 0, CMD_SIMTYPE},
{"verbosity", 1, 0, CMD_VERBOSITY},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
static gk_StringMap_t simtype_options[] = {
{"dotp", GK_CSR_DOTP},
{"cos", GK_CSR_COS},
{"jac", GK_CSR_JAC},
{NULL, 0}
};
/*-------------------------------------------------------------------
* Mini help
*-------------------------------------------------------------------*/
static char helpstr[][100] =
{
" ",
"Usage: cmpnbrs [options] afile bfile",
" ",
" Options",
" -simtype=string",
" Specifies the type of similarity to use. Possible values are:",
" dotp - Dot-product similarity [default]",
" cos - Cosine similarity",
" jac - Jacquard similarity",
" ",
" -verbosity=int",
" Specifies the level of debugging information to be displayed.",
" Default value is 0.",
" ",
" -help",
" Prints this message.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[]);
double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat, gk_csr_t *bmat);
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->simtype = GK_CSR_DOTP;
params->verbosity = -1;
params->afile = NULL;
params->bfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_SIMTYPE:
if (gk_optarg) {
if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1)
errexit("Invalid simtype of %s.\n", gk_optarg);
}
break;
case CMD_VERBOSITY:
if (gk_optarg) params->verbosity = atoi(gk_optarg);
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(EXIT_SUCCESS);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(EXIT_FAILURE);
}
}
/* Get the input/output file info */
if (argc-gk_optind != 2) {
printf("Missing input file info.\n Use %s -help for a summary of the options.\n", argv[0]);
exit(EXIT_FAILURE);
}
params->afile = gk_strdup(argv[gk_optind++]);
params->bfile = gk_strdup(argv[gk_optind++]);
if (!gk_fexists(params->afile))
errexit("input file %s does not exist.\n", params->afile);
if (!gk_fexists(params->bfile))
errexit("input file %s does not exist.\n", params->bfile);
return params;
}
/*************************************************************************/
/*! This is the entry point of the program */
/**************************************************************************/
int main(int argc, char *argv[])
{
params_t *params;
gk_csr_t *amat, *bmat, *amatt, *bmatt;
int rc = EXIT_SUCCESS;
params = parse_cmdline(argc, argv);
amat = gk_csr_Read(params->afile, GK_CSR_FMT_CSR, 1, 0);
bmat = gk_csr_Read(params->bfile, GK_CSR_FMT_CSR, 1, 0);
/* make the matrices of similar dimensions (if neccessary) */
GKASSERT(amat->nrows == bmat->nrows);
amat->ncols = gk_max(amat->ncols, bmat->ncols);
bmat->ncols = amat->ncols;
/* create the transpose matrices */
amatt = gk_csr_Transpose(amat);
bmatt = gk_csr_Transpose(bmat);
printf("********************************************************************************\n");
printf("cmpnbrs (%d.%d.%d) Copyright 2015, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR);
printf(" simtype=%s\n",
simtypenames[params->simtype]);
printf(" afile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
params->afile, amat->nrows, amat->ncols, amat->rowptr[amat->nrows]);
printf(" bfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
params->bfile, bmat->nrows, bmat->ncols, bmat->rowptr[bmat->nrows]);
gk_clearwctimer(params->timer_global);
gk_startwctimer(params->timer_global);
printf("SIM(AA', BB'): %.5lf\t", ComputeNeighborhoodSimilarity(params, amat, bmat));
printf("SIM(A'A, B'B): %.5lf\n", ComputeNeighborhoodSimilarity(params, amatt, bmatt));
gk_stopwctimer(params->timer_global);
printf(" wclock: %.2lfs\n", gk_getwctimer(params->timer_global));
printf("********************************************************************************\n");
gk_csr_Free(&amat);
gk_csr_Free(&bmat);
gk_csr_Free(&amatt);
gk_csr_Free(&bmatt);
exit(rc);
}
/*************************************************************************/
/*! Compares the neighbors of AA' vs BB' */
/**************************************************************************/
double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat,
gk_csr_t *bmat)
{
int iR, iH, nahits, nbhits, ncmps;
int32_t *marker;
gk_fkv_t *ahits, *bhits, *cand;
double tabsim, abdot, anorm2, bnorm2, *avec, *bvec;
/* if cosine, make rows unit length */
if (params->simtype == GK_CSR_COS) {
gk_csr_Normalize(amat, GK_CSR_ROW, 2);
gk_csr_Normalize(bmat, GK_CSR_ROW, 2);
}
/* create the inverted index */
gk_csr_CreateIndex(amat, GK_CSR_COL);
gk_csr_CreateIndex(bmat, GK_CSR_COL);
/* compute the row squared norms */
gk_csr_ComputeSquaredNorms(amat, GK_CSR_ROW);
gk_csr_ComputeSquaredNorms(bmat, GK_CSR_ROW);
/* allocate memory for the necessary working arrays */
ahits = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: ahits");
bhits = gk_fkvmalloc(bmat->nrows, "ComputeNeighborhoodSimilarity: bhits");
marker = gk_i32smalloc(amat->nrows, -1, "ComputeNeighborhoodSimilarity: marker");
cand = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: cand");
avec = gk_dsmalloc(amat->nrows, 0.0, "ComputeNeighborhoodSimilarity: avec");
bvec = gk_dsmalloc(bmat->nrows, 0.0, "ComputeNeighborhoodSimilarity: bvec");
/* find the best neighbors for each row in the two matrices and compute
the cosine similarity between them. */
tabsim = 0.0;
ncmps = 0;
for (iR=0; iR<amat->nrows; iR++) {
if (params->verbosity > 1)
printf("Working on row %7d\n", iR);
if (amat->rowptr[iR+1]-amat->rowptr[iR] == 0 ||
bmat->rowptr[iR+1]-bmat->rowptr[iR] == 0)
continue;
nahits = gk_csr_GetSimilarRows(amat,
amat->rowptr[iR+1]-amat->rowptr[iR],
amat->rowind+amat->rowptr[iR],
amat->rowval+amat->rowptr[iR],
params->simtype, amat->nrows, 0.0,
ahits, marker, cand);
nbhits = gk_csr_GetSimilarRows(bmat,
bmat->rowptr[iR+1]-bmat->rowptr[iR],
bmat->rowind+bmat->rowptr[iR],
bmat->rowval+bmat->rowptr[iR],
params->simtype, bmat->nrows, 0.0,
bhits, marker, cand);
if (params->verbosity > 0)
printf("Row %7d %7d %7d %8zd %8zd\n", iR, nahits, nbhits,
amat->rowptr[iR+1]-amat->rowptr[iR], bmat->rowptr[iR+1]-bmat->rowptr[iR]);
for (iH=0; iH<nahits; iH++)
avec[ahits[iH].val] = ahits[iH].key;
for (iH=0; iH<nbhits; iH++)
bvec[bhits[iH].val] = bhits[iH].key;
for (abdot=anorm2=bnorm2=0.0, iH=0; iH<amat->nrows; iH++) {
abdot += avec[iH]*bvec[iH];
anorm2 += avec[iH]*avec[iH];
bnorm2 += bvec[iH]*bvec[iH];
}
tabsim += (abdot > 0 ? abdot/sqrt(anorm2*bnorm2) : 0.0);
ncmps++;
for (iH=0; iH<nahits; iH++)
avec[ahits[iH].val] = 0.0;
for (iH=0; iH<nbhits; iH++)
bvec[bhits[iH].val] = 0.0;
}
gk_free((void **)&ahits, &bhits, &marker, &cand, &avec, &bvec, LTERM);
return tabsim/ncmps;
}

@ -0,0 +1,397 @@
/*!
\file
\brief A simple program to convert between different matrix formats that are supported
by the gk_csr_Read/gk_csr_Write functions.
\date 5/30/2013
\author George
\version \verbatim $Id: csrcnv.c 15314 2013-10-05 16:50:50Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int inf, outf; /* input/output format */
int numbering; /* input numbering (output when applicable) */
int readvals; /* input values (output when applicable) */
int writevals; /* output values */
int rshuf, cshuf; /* random shuffle of rows/columns */
int symmetric; /* a symmetric shuffle */
int mincolfreq; /* column prunning */
int maxcolfreq; /* column prunning */
int minrowfreq; /* row prunning */
int maxrowfreq; /* row prunning */
float rownrmfltr; /* row-lowfilter threshold */
int compactcols; /* if to renumber columns to eliminate empty ones */
int transpose; /* transpose the output matrix */
char *srenumber; /* the iperm file for the symmetric renumbering */
char *infile; /* input file */
char *outfile; /* output file */
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_NUMONE 1
#define CMD_NOREADVALS 2
#define CMD_NOWRITEVALS 3
#define CMD_RSHUF 4
#define CMD_CSHUF 5
#define CMD_SYMMETRIC 6
#define CMD_MINCOLFREQ 7
#define CMD_MAXCOLFREQ 8
#define CMD_MINROWFREQ 9
#define CMD_MAXROWFREQ 10
#define CMD_ROWNRMFLTR 11
#define CMD_COMPACTCOLS 12
#define CMD_TRANSPOSE 13
#define CMD_SRENUMBER 14
#define CMD_HELP 100
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"numone", 0, 0, CMD_NUMONE},
{"noreadvals", 0, 0, CMD_NOREADVALS},
{"nowritevals", 0, 0, CMD_NOWRITEVALS},
{"rshuf", 0, 0, CMD_RSHUF},
{"cshuf", 0, 0, CMD_CSHUF},
{"symmetric", 0, 0, CMD_SYMMETRIC},
{"mincolfreq", 1, 0, CMD_MINCOLFREQ},
{"maxcolfreq", 1, 0, CMD_MAXCOLFREQ},
{"minrowfreq", 1, 0, CMD_MINROWFREQ},
{"maxrowfreq", 1, 0, CMD_MAXROWFREQ},
{"rownrmfltr", 1, 0, CMD_ROWNRMFLTR},
{"compactcols", 0, 0, CMD_COMPACTCOLS},
{"transpose", 0, 0, CMD_TRANSPOSE},
{"srenumber", 1, 0, CMD_SRENUMBER},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: csrconv [options] <infile> <inf> <outfile> <outf>",
" ",
" Required parameters",
" infile, outfile",
" The name of the input/output CSR file.",
" ",
" inf/outf",
" The format of the input/output file.",
" Supported values are:",
" 1 GK_CSR_FMT_CLUTO",
" 2 GK_CSR_FMT_CSR",
" 3 GK_CSR_FMT_METIS",
" 4 GK_CSR_FMT_BINROW",
" 6 GK_CSR_FMT_IJV",
" 7 GK_CSR_FMT_BIJV",
" ",
" Optional parameters",
" -numone",
" Specifies that the numbering of the input file starts from 1. ",
" It only applies to CSR/IJV formats.",
" ",
" -nowritevals",
" Specifies that no values will be output.",
" ",
" -noreadvals",
" Specifies that the values will not be read when applicable.",
" ",
" -rshuf",
" Specifies that the rows will be randmly shuffled prior to output.",
" ",
" -cshuf",
" Specifies that the columns will be randmly shuffled prior to output.",
" ",
" -symmetric",
" Specifies that the row+column shuffling will be symmetric.",
" ",
" -mincolfreq=int",
" Used to prune infrequent columns.",
" ",
" -maxcolfreq=int",
" Used to prune frequent columns.",
" ",
" -minrowfreq=int",
" Used to prune infrequent rows.",
" ",
" -maxrowfreq=int",
" Used to prune frequent.",
" ",
" -rownrmfltr=float",
" The parameter to use for the row-wise low filter.",
" ",
" -compactcols",
" Specifies if empty columns will be removed and the columns renumbered.",
" ",
" -transpose",
" Specifies that the transposed matrix will be written.",
" ",
" -srenumber=iperm-file",
" Performs a symmetric renumbering based on the provided iperm file.",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: csrconv [options] <infile> <inf> <outfile> <outf>",
" use 'csrconv -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->numbering = 0;
params->readvals = 1;
params->writevals = 1;
params->rshuf = 0;
params->cshuf = 0;
params->symmetric = 0;
params->transpose = 0;
params->srenumber = NULL;
params->mincolfreq = -1;
params->minrowfreq = -1;
params->maxcolfreq = -1;
params->maxrowfreq = -1;
params->rownrmfltr = -1;
params->compactcols = 0;
params->inf = -1;
params->outf = -1;
params->infile = NULL;
params->outfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_NUMONE:
params->numbering = 1;
break;
case CMD_NOREADVALS:
params->readvals = 0;
break;
case CMD_NOWRITEVALS:
params->writevals = 0;
break;
case CMD_RSHUF:
params->rshuf = 1;
break;
case CMD_CSHUF:
params->cshuf = 1;
break;
case CMD_SYMMETRIC:
params->symmetric = 1;
break;
case CMD_TRANSPOSE:
params->transpose = 1;
break;
case CMD_MINCOLFREQ:
if (gk_optarg) params->mincolfreq = atoi(gk_optarg);
break;
case CMD_MINROWFREQ:
if (gk_optarg) params->minrowfreq = atoi(gk_optarg);
break;
case CMD_MAXCOLFREQ:
if (gk_optarg) params->maxcolfreq = atoi(gk_optarg);
break;
case CMD_MAXROWFREQ:
if (gk_optarg) params->maxrowfreq = atoi(gk_optarg);
break;
case CMD_ROWNRMFLTR:
if (gk_optarg) params->rownrmfltr = atof(gk_optarg);
break;
case CMD_COMPACTCOLS:
params->compactcols = 1;
break;
case CMD_SRENUMBER:
if (gk_optarg) {
params->srenumber = gk_strdup(gk_optarg);
if (!gk_fexists(params->srenumber))
errexit("srenumber file %s does not exist.\n", params->srenumber);
}
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 4) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->infile = gk_strdup(argv[gk_optind++]);
params->inf = atoi(argv[gk_optind++]);
params->outfile = gk_strdup(argv[gk_optind++]);
params->outf = atoi(argv[gk_optind++]);
if (!gk_fexists(params->infile))
errexit("input file %s does not exist.\n", params->infile);
return params;
}
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
int what;
params_t *params;
gk_csr_t *mat, *mat1, *smat;
/* get command-line options */
params = parse_cmdline(argc, argv);
/* read the data */
mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering);
/* deal with weird transformations */
if (params->mincolfreq != -1 || params->maxcolfreq != -1) {
params->mincolfreq = (params->mincolfreq == -1 ? 0 : params->mincolfreq);
params->maxcolfreq = (params->maxcolfreq == -1 ? mat->nrows : params->maxcolfreq);
printf("Column prune: %d %d; nnz: %zd => ",
params->mincolfreq, params->maxcolfreq, mat->rowptr[mat->nrows]);
mat1 = gk_csr_Prune(mat, GK_CSR_COL, params->mincolfreq, params->maxcolfreq);
gk_csr_Free(&mat);
mat = mat1;
mat1 = NULL;
printf("%zd\n", mat->rowptr[mat->nrows]);
}
if (params->minrowfreq != -1 || params->maxrowfreq != -1) {
params->minrowfreq = (params->minrowfreq == -1 ? 0 : params->minrowfreq);
params->maxrowfreq = (params->maxrowfreq == -1 ? mat->ncols : params->maxrowfreq);
printf("Row prune: %d %d; nnz: %zd => ",
params->minrowfreq, params->maxrowfreq, mat->rowptr[mat->nrows]);
mat1 = gk_csr_Prune(mat, GK_CSR_ROW, params->minrowfreq, params->maxrowfreq);
gk_csr_Free(&mat);
mat = mat1;
mat1 = NULL;
printf("%zd\n", mat->rowptr[mat->nrows]);
}
if (params->rownrmfltr >= 0.0) {
//gk_csr_Scale(mat, GK_CSR_LOG);
//gk_csr_Scale(mat, GK_CSR_IDF2);
printf("Row low filter: %f; nnz: %zd => ", params->rownrmfltr, mat->rowptr[mat->nrows]);
mat1 = gk_csr_LowFilter(mat, GK_CSR_ROW, 2, params->rownrmfltr);
gk_csr_Normalize(mat1, GK_CSR_ROW, 2);
gk_csr_Free(&mat);
mat = mat1;
mat1 = NULL;
printf("%zd\n", mat->rowptr[mat->nrows]);
}
if (params->compactcols) {
printf("Compacting columns: %d => ", mat->ncols);
gk_csr_CompactColumns(mat);
printf("%d\n", mat->ncols);
}
if (params->rshuf || params->cshuf) {
if (params->rshuf && params->cshuf)
what = GK_CSR_ROWCOL;
else if (params->rshuf)
what = GK_CSR_ROW;
else
what = GK_CSR_COL;
smat = gk_csr_Shuffle(mat, what, params->symmetric);
gk_csr_Free(&mat);
mat = smat;
}
if (params->srenumber) {
int32_t i;
size_t nlines;
int32_t *iperm;
gk_csr_t *smat;
iperm = gk_i32readfile(params->srenumber, &nlines);
if (nlines != mat->nrows && nlines != mat->ncols)
errexit("The nlines=%zud of srenumber file does not match nrows: %d, ncols: %d\n", nlines, mat->nrows, mat->ncols);
if (gk_i32max(nlines, iperm, 1) >= nlines && gk_i32min(nlines, iperm, 1) <= 0)
errexit("The srenumber iperm seems to be wrong.\n");
if (gk_i32max(nlines, iperm, 1) == nlines) { /* need to renumber */
for (i=0; i<nlines; i++)
iperm[i]--;
}
smat = gk_csr_ReorderSymmetric(mat, iperm, NULL);
gk_csr_Free(&mat);
mat = smat;
gk_free((void **)&iperm, LTERM);
}
if (params->writevals && mat->rowval == NULL)
mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval");
if (params->transpose) {
mat1 = gk_csr_Transpose(mat);
gk_csr_Free(&mat);
mat = mat1;
mat1 = NULL;
}
gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0);
gk_csr_Free(&mat);
}

@ -0,0 +1,286 @@
/*!
\file
\brief A simple frequent itemset discovery program to test GKlib's routines
\date 6/12/2008
\author George
\version \verbatim $Id: fis.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
ssize_t minlen, maxlen;
ssize_t minfreq, maxfreq;
char *filename;
int silent;
ssize_t nitemsets;
char *clabelfile;
char **clabels;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_MINLEN 1
#define CMD_MAXLEN 2
#define CMD_MINFREQ 3
#define CMD_MAXFREQ 4
#define CMD_SILENT 5
#define CMD_CLABELFILE 6
#define CMD_HELP 10
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"minlen", 1, 0, CMD_MINLEN},
{"maxlen", 1, 0, CMD_MAXLEN},
{"minfreq", 1, 0, CMD_MINFREQ},
{"maxfreq", 1, 0, CMD_MAXFREQ},
{"silent", 0, 0, CMD_SILENT},
{"clabels", 1, 0, CMD_CLABELFILE},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: fis [options] <mat-file>",
" ",
" Required parameters",
" mat-file",
" The name of the file storing the transactions. The file is in ",
" Cluto's .mat format.",
" ",
" Optional parameters",
" -minlen=int",
" Specifies the minimum length of the patterns. [default: 1]",
" ",
" -maxlen=int",
" Specifies the maximum length of the patterns. [default: none]",
" ",
" -minfreq=int",
" Specifies the minimum frequency of the patterns. [default: 10]",
" ",
" -maxfreq=int",
" Specifies the maximum frequency of the patterns. [default: none]",
" ",
" -silent",
" Does not print the discovered itemsets.",
" ",
" -clabels=filename",
" Specifies the name of the file that stores the column labels.",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: fis [options] <mat-file>",
" use 'fis -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
void print_init_info(params_t *params, gk_csr_t *mat);
void print_final_info(params_t *params);
params_t *parse_cmdline(int argc, char *argv[]);
void print_an_itemset(void *stateptr, int nitems, int *itemind,
int ntrans, int *tranind);
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
ssize_t i;
char line[8192];
FILE *fpin;
params_t *params;
gk_csr_t *mat;
params = parse_cmdline(argc, argv);
params->nitemsets = 0;
/* read the data */
mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1);
gk_csr_CreateIndex(mat, GK_CSR_COL);
/* read the column labels */
params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels");
if (params->clabelfile == NULL) {
for (i=0; i<mat->ncols; i++) {
sprintf(line, "%zd", i);
params->clabels[i] = gk_strdup(line);
}
}
else {
fpin = gk_fopen(params->clabelfile, "r", "main: fpin");
for (i=0; i<mat->ncols; i++) {
if (fgets(line, 8192, fpin) == NULL)
errexit("Failed on fgets.\n");
params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t"));
}
gk_fclose(fpin);
}
print_init_info(params, mat);
gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind,
params->minfreq, params->maxfreq, params->minlen, params->maxlen,
&print_an_itemset, (void *)params);
printf("Total itemsets found: %zd\n", params->nitemsets);
print_final_info(params);
}
/*************************************************************************/
/*! This function prints run parameters */
/*************************************************************************/
void print_init_info(params_t *params, gk_csr_t *mat)
{
printf("*******************************************************************************\n");
printf(" fis\n\n");
printf("Matrix Information ---------------------------------------------------------\n");
printf(" input file=%s, [%d, %d, %zd]\n",
params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
printf("\n");
printf("Options --------------------------------------------------------------------\n");
printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n",
params->minlen, params->maxlen, params->minfreq, params->maxfreq);
printf("\n");
printf("Finding patterns... -----------------------------------------------------\n");
}
/*************************************************************************/
/*! This function prints final statistics */
/*************************************************************************/
void print_final_info(params_t *params)
{
printf("\n");
printf("Memory Usage Information -----------------------------------------------------\n");
printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
printf("********************************************************************************\n");
}
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->minlen = 1;
params->maxlen = -1;
params->minfreq = 10;
params->maxfreq = -1;
params->silent = 0;
params->filename = NULL;
params->clabelfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_MINLEN:
if (gk_optarg) params->minlen = atoi(gk_optarg);
break;
case CMD_MAXLEN:
if (gk_optarg) params->maxlen = atoi(gk_optarg);
break;
case CMD_MINFREQ:
if (gk_optarg) params->minfreq = atoi(gk_optarg);
break;
case CMD_MAXFREQ:
if (gk_optarg) params->maxfreq = atoi(gk_optarg);
break;
case CMD_SILENT:
params->silent = 1;
break;
case CMD_CLABELFILE:
if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg);
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 1) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->filename = gk_strdup(argv[gk_optind++]);
if (!gk_fexists(params->filename))
errexit("input file %s does not exist.\n", params->filename);
return params;
}
/*************************************************************************/
/*! This is the callback function for the itemset discovery routine */
/*************************************************************************/
void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans,
int *transids)
{
ssize_t i;
params_t *params;
params = (params_t *)stateptr;
params->nitemsets++;
if (!params->silent) {
printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans);
for (i=0; i<nitems; i++)
printf(" %s", params->clabels[itemids[i]]);
printf("\n");
for (i=0; i<ntrans; i++)
printf(" %d\n", transids[i]);
printf("\n");
}
}

@ -0,0 +1,845 @@
/*!
\file
\brief A simple program to try out some graph routines
\date 6/12/2008
\author George
\version \verbatim $Id: gkgraph.c 17700 2014-09-27 18:10:02Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int lnbits;
int cnbits;
int type;
int niter;
float eps;
float lamda;
int nosort;
int write;
char *infile;
char *outfile;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_NITER 1
#define CMD_EPS 2
#define CMD_LAMDA 3
#define CMD_TYPE 4
#define CMD_NOSORT 5
#define CMD_WRITE 6
#define CMD_LNBITS 7
#define CMD_CNBITS 8
#define CMD_HELP 10
#define CLINE32 16
#define CLINE64 8
#define MAXRCLOCKSPAN (1<<20)
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"lnbits", 1, 0, CMD_LNBITS},
{"cnbits", 1, 0, CMD_CNBITS},
{"type", 1, 0, CMD_TYPE},
{"niter", 1, 0, CMD_NITER},
{"lamda", 1, 0, CMD_LAMDA},
{"eps", 1, 0, CMD_EPS},
{"nosort", 0, 0, CMD_NOSORT},
{"write", 0, 0, CMD_WRITE},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: gkgraph [options] <graph-file> [<out-file>]",
" ",
" Required parameters",
" graph-file",
" The name of the file storing the graph. The file is in ",
" Metis' graph format.",
" ",
" Optional parameters",
" -niter=int",
" Specifies the maximum number of iterations. [default: 100]",
" ",
" -lnbits=int",
" Specifies the number of address bits indexing the cacheline. [default: 6]",
" ",
" -cnbits=int",
" Specifies the number of address bits indexing the cache. [default: 13]",
" ",
" -lamda=float",
" Specifies the follow-the-adjacent-links probability. [default: 0.80]",
" ",
" -eps=float",
" Specifies the error tollerance. [default: 1e-10]",
" ",
" -nosort",
" Does not sort the adjacency lists.",
" ",
" -write",
" Output the reordered graphs.",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: gkgraph [options] <graph-file> [<out-file>]",
" use 'gkgraph -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
void test_spmv(params_t *params);
void test_tc(params_t *params);
void sort_adjacencies(params_t *params, gk_graph_t *graph);
double compute_spmvstats(params_t *params, gk_graph_t *graph);
double compute_tcstats(params_t *params, gk_graph_t *graph, int32_t *iperm);
int32_t *reorder_degrees(params_t *params, gk_graph_t *graph);
int32_t *reorder_freqlpn(params_t *params, gk_graph_t *graph);
int32_t *reorder_freqlpn_db(params_t *params, gk_graph_t *graph);
int32_t *reorder_minlpn(params_t *params, gk_graph_t *graph);
int32_t *reorder_minlpn_db(params_t *params, gk_graph_t *graph);
void print_init_info(params_t *params, gk_graph_t *graph);
void print_final_info(params_t *params);
params_t *parse_cmdline(int argc, char *argv[]);
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
params_t *params;
/* get command-line options */
params = parse_cmdline(argc, argv);
test_tc(params);
}
/*************************************************************************/
/*! various spmv-related tests */
/**************************************************************************/
void test_spmv(params_t *params)
{
ssize_t i, j, v;
gk_graph_t *graph, *pgraph;
int32_t *perm;
/* read the data */
graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, -1, -1, 0, 0, 0);
/* display some basic stats */
print_init_info(params, graph);
sort_adjacencies(params, graph);
if (params->write) gk_graph_Write(graph, "original.ijv", GK_GRAPH_FMT_IJV, 1);
printf("Input SPMV HitRate: %.4lf\n", compute_spmvstats(params, graph));
v = RandomInRange(graph->nvtxs);
gk_graph_ComputeBFSOrdering(graph, v, &perm, NULL);
pgraph = gk_graph_Reorder(graph, perm, NULL);
sort_adjacencies(params, pgraph);
if (params->write) gk_graph_Write(pgraph, "bfs.ijv", GK_GRAPH_FMT_IJV, 1);
printf("BFS SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
gk_graph_Free(&pgraph);
gk_free((void **)&perm, LTERM);
perm = reorder_degrees(params, graph);
pgraph = gk_graph_Reorder(graph, perm, NULL);
sort_adjacencies(params, pgraph);
if (params->write) gk_graph_Write(pgraph, "degrees.ijv", GK_GRAPH_FMT_IJV, 1);
printf("Degrees SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
gk_graph_Free(&pgraph);
gk_free((void **)&perm, LTERM);
perm = reorder_freqlpn(params, graph);
pgraph = gk_graph_Reorder(graph, perm, NULL);
sort_adjacencies(params, pgraph);
if (params->write) gk_graph_Write(pgraph, "freqlpn.ijv", GK_GRAPH_FMT_IJV, 1);
printf("FreqLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
gk_graph_Free(&pgraph);
gk_free((void **)&perm, LTERM);
perm = reorder_freqlpn_db(params, graph);
pgraph = gk_graph_Reorder(graph, perm, NULL);
sort_adjacencies(params, pgraph);
if (params->write) gk_graph_Write(pgraph, "freqlpn-db.ijv", GK_GRAPH_FMT_IJV, 1);
printf("DBFreqLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
gk_graph_Free(&pgraph);
gk_free((void **)&perm, LTERM);
perm = reorder_minlpn(params, graph);
pgraph = gk_graph_Reorder(graph, perm, NULL);
sort_adjacencies(params, pgraph);
if (params->write) gk_graph_Write(pgraph, "minlpn.ijv", GK_GRAPH_FMT_IJV, 1);
printf("MinLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
gk_graph_Free(&pgraph);
gk_free((void **)&perm, LTERM);
perm = reorder_minlpn_db(params, graph);
pgraph = gk_graph_Reorder(graph, perm, NULL);
sort_adjacencies(params, pgraph);
if (params->write) gk_graph_Write(pgraph, "minlpn-db.ijv", GK_GRAPH_FMT_IJV, 1);
printf("DBMinLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
gk_graph_Free(&pgraph);
gk_free((void **)&perm, LTERM);
gk_graph_Free(&graph);
print_final_info(params);
return;
}
/*************************************************************************/
/*! various tc-related tests */
/**************************************************************************/
void test_tc(params_t *params)
{
ssize_t i, j, v;
gk_graph_t *graph, *pgraph;
int32_t *perm, *iperm;
/* read the data */
graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, -1, -1, 0, 0, 0);
/* display some basic stats */
print_init_info(params, graph);
perm = reorder_degrees(params, graph);
pgraph = gk_graph_Reorder(graph, perm, NULL);
gk_free((void **)&perm, LTERM);
sort_adjacencies(params, pgraph);
iperm = gk_i32incset(graph->nvtxs, 0, gk_i32malloc(graph->nvtxs, "iperm"));
printf("Degrees TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
sort_adjacencies(params, pgraph);
v = RandomInRange(pgraph->nvtxs);
gk_graph_ComputeBFSOrdering(pgraph, v, &perm, NULL);
for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i;
gk_free((void **)&perm, LTERM);
printf("BFS TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
sort_adjacencies(params, pgraph);
perm = reorder_freqlpn(params, pgraph);
for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i;
gk_free((void **)&perm, LTERM);
printf("FreqLabelPropN TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
sort_adjacencies(params, pgraph);
perm = reorder_freqlpn_db(params, pgraph);
for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i;
gk_free((void **)&perm, LTERM);
printf("DBFreqLabelPropN TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
#ifdef XXX
perm = reorder_minlpn(params, graph);
pgraph = gk_graph_Reorder(graph, perm, NULL);
sort_adjacencies(params, pgraph);
if (params->write) gk_graph_Write(pgraph, "minlpn.ijv", GK_GRAPH_FMT_IJV, 1);
printf("MinLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
gk_graph_Free(&pgraph);
gk_free((void **)&perm, LTERM);
perm = reorder_minlpn_db(params, graph);
pgraph = gk_graph_Reorder(graph, perm, NULL);
sort_adjacencies(params, pgraph);
if (params->write) gk_graph_Write(pgraph, "minlpn-db.ijv", GK_GRAPH_FMT_IJV, 1);
printf("DBMinLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
gk_graph_Free(&pgraph);
gk_free((void **)&perm, LTERM);
#endif
gk_free((void **)&iperm, LTERM);
gk_graph_Free(&graph);
print_final_info(params);
return;
}
/*************************************************************************/
/*! This function sorts the adjacency lists of the vertices in increasing
order.
*/
/*************************************************************************/
void sort_adjacencies(params_t *params, gk_graph_t *graph)
{
uint64_t i, nvtxs;
ssize_t *xadj;
int32_t *adjncy;
if (params->nosort)
return;
nvtxs = graph->nvtxs;
xadj = graph->xadj;
adjncy = graph->adjncy;
for (i=0; i<nvtxs; i++)
gk_i32sorti(xadj[i+1]-xadj[i], adjncy+xadj[i]);
return;
}
/*************************************************************************/
/*! This function analyzes the cache locality of an SPMV operation using
GKlib's cache simulator and returns the cache's hit rate.
*/
/*************************************************************************/
double compute_spmvstats(params_t *params, gk_graph_t *graph)
{
uint64_t i, nvtxs;
ssize_t *xadj;
int32_t *adjncy, *vec;
gk_cache_t *cache = gk_cacheCreate(16, params->lnbits, params->cnbits); /* 8MB total; i7 spec */
nvtxs = graph->nvtxs;
xadj = graph->xadj;
adjncy = graph->adjncy;
vec = gk_i32malloc(nvtxs, "vec");
for (i=0; i<xadj[nvtxs]; i++) {
gk_cacheLoad(cache, (size_t)(&adjncy[i]));
gk_cacheLoad(cache, (size_t)(&vec[adjncy[i]]));
}
gk_free((void **)&vec, LTERM);
double hitrate = gk_cacheGetHitRate(cache);
gk_cacheDestroy(&cache);
return hitrate;
}
/*************************************************************************/
/*! The hash-map-based triangle-counting routine that uses the JIK
triangle enumeration scheme.
This version implements the following:
- It does not store location information in L
- Reverts the order within U's adjancency lists to allow ++ traversal
*/
/*************************************************************************/
double compute_tcstats(params_t *params, gk_graph_t *graph, int32_t *iperm)
{
int32_t vi, vj, vjj, vk, vl, nvtxs;
ssize_t ei, eiend, eistart, ej, ejend, ejstart;
int64_t ntriangles;
ssize_t *xadj, *uxadj;
int32_t *adjncy;
int32_t l, hmsize, *hmap;
gk_cache_t *cache = gk_cacheCreate(16, params->lnbits, params->cnbits);
nvtxs = graph->nvtxs;
xadj = graph->xadj;
adjncy = graph->adjncy;
/* determine the starting location of the upper trianglular part */
uxadj = gk_zmalloc(nvtxs, "uxadj");
for (vi=0; vi<nvtxs; vi++) {
for (ei=xadj[vi], eiend=xadj[vi+1]; ei<eiend && adjncy[ei]<vi; ei++);
uxadj[vi] = ei;
/* flip the order of Adj(vi)'s upper triangular adjacency list */
for (ej=xadj[vi+1]-1; ei<ej; ei++, ej--) {
vj = adjncy[ei];
adjncy[ei] = adjncy[ej];
adjncy[ej] = vj;
}
}
/* determine the size of the hash-map and convert it into a format
that is compatible with a bitwise AND operation */
for (hmsize=0, vi=0; vi<nvtxs; vi++)
hmsize = gk_max(hmsize, (int32_t)(xadj[vi+1]-uxadj[vi]));
for (l=1; hmsize>(1<<l); l++);
hmsize = (1<<(l+4))-1;
hmap = gk_i32smalloc(hmsize+1, 0, "hmap");
for (ntriangles=0, vjj=0; vjj<nvtxs; vjj++) {
vj = iperm[vjj];
gk_cacheLoad(cache, (size_t)(&xadj[vj]));
gk_cacheLoad(cache, (size_t)(&xadj[vj+1]));
gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
if (xadj[vj+1]-uxadj[vj] == 0 || uxadj[vj] == xadj[vj])
continue;
/* hash Adj(vj) */
gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
gk_cacheLoad(cache, (size_t)(&xadj[vj+1]));
for (ej=uxadj[vj], ejend=xadj[vj+1]; ej<ejend; ej++) {
gk_cacheLoad(cache, (size_t)(&adjncy[ej]));
vk = adjncy[ej];
for (l=(vk&hmsize);
gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=0;
l=((l+1)&hmsize));
hmap[l] = vk;
}
/* find intersections */
gk_cacheLoad(cache, (size_t)(&xadj[vj]));
gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
for (ej=xadj[vj], ejend=uxadj[vj]; ej<ejend; ej++) {
gk_cacheLoad(cache, (size_t)(&adjncy[ej]));
gk_cacheLoad(cache, (size_t)(&uxadj[vi]));
vi = adjncy[ej];
for (ei=uxadj[vi]; gk_cacheLoad(cache, (size_t)(&adjncy[ei])) && adjncy[ei]>vj; ei++) {
vk = adjncy[ei];
for (l=vk&hmsize;
gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=0 && hmap[l]!=vk;
l=((l+1)&hmsize));
gk_cacheLoad(cache, (size_t)(&hmap[l]));
if (hmap[l] == vk)
ntriangles++;
}
}
/* reset hash */
gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
gk_cacheLoad(cache, (size_t)(&xadj[vj+1]));
for (ej=uxadj[vj], ejend=xadj[vj+1]; ej<ejend; ej++) {
gk_cacheLoad(cache, (size_t)(&adjncy[ej]));
vk = adjncy[ej];
for (l=(vk&hmsize);
gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=vk;
l=((l+1)&hmsize));
hmap[l] = 0;
}
}
printf("& compatible hmsize: %"PRId32" #triangles: %"PRIu64"\n", hmsize, ntriangles);
gk_free((void **)&uxadj, &hmap, LTERM);
//printf("%zd %zd\n", (ssize_t)cache->nhits, (ssize_t)cache->clock);
double hitrate = gk_cacheGetHitRate(cache);
gk_cacheDestroy(&cache);
return hitrate;
}
/*************************************************************************/
/*! This function computes an increasing degree ordering
*/
/*************************************************************************/
int32_t *reorder_degrees(params_t *params, gk_graph_t *graph)
{
int i, v, u, nvtxs, range;
ssize_t j, *xadj;
int32_t *counts, *perm;
nvtxs = graph->nvtxs;
xadj = graph->xadj;
for (range=0, i=0; i<nvtxs; i++)
range = gk_max(range, xadj[i+1]-xadj[i]);
range++;
counts = gk_i32smalloc(range+1, 0, "counts");
for (i=0; i<nvtxs; i++)
counts[xadj[i+1]-xadj[i]]++;
MAKECSR(i, range, counts);
perm = gk_i32malloc(nvtxs, "perm");
for (i=0; i<nvtxs; i++)
perm[i] = counts[xadj[i+1]-xadj[i]]++;
gk_free((void **)&counts, LTERM);
return perm;
}
/*************************************************************************/
/*! This function re-orders the graph by:
- performing a fixed number of most-popular label propagation iterations
- locally renumbers the vertices with the same label
*/
/*************************************************************************/
int32_t *reorder_freqlpn(params_t *params, gk_graph_t *graph)
{
int32_t i, ii, k, nvtxs, maxlbl;
ssize_t j, *xadj;
int32_t *adjncy, *labels, *freq, *perm;
gk_i32kv_t *cand;
nvtxs = graph->nvtxs;
xadj = graph->xadj;
adjncy = graph->adjncy;
labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
freq = gk_i32smalloc(nvtxs, 0, "freq");
perm = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
for (k=0; k<params->niter; k++) {
gk_i32randArrayPermuteFine(nvtxs, perm, 0);
for (ii=0; ii<nvtxs; ii++) {
i = perm[ii];
maxlbl = labels[adjncy[xadj[i]]];
freq[maxlbl] = 1;
for (j=xadj[i]+1; j<xadj[i+1]; j++) {
freq[labels[adjncy[j]]]++;
if (freq[maxlbl] < freq[labels[adjncy[j]]])
maxlbl = labels[adjncy[j]];
else if (freq[maxlbl] == freq[labels[adjncy[j]]]) {
if (RandomInRange(2))
maxlbl = labels[adjncy[j]];
}
}
for (j=xadj[i]; j<xadj[i+1]; j++)
freq[labels[adjncy[j]]] = 0;
labels[i] = maxlbl;
}
}
cand = gk_i32kvmalloc(nvtxs, "cand");
for (i=0; i<nvtxs; i++) {
cand[i].key = labels[i];
cand[i].val = i;
}
gk_i32kvsorti(nvtxs, cand);
for (i=0; i<nvtxs; i++)
perm[cand[i].val] = i;
gk_free((void **)&labels, &freq, &cand, LTERM);
return perm;
}
/*************************************************************************/
/*! This function re-orders the graph by:
- performing a fixed number of most-popular label propagation iterations
- restricts that propagation to take place within similar degree buckets
of vertices
- locally renumbers the vertices with the same label
*/
/*************************************************************************/
int32_t *reorder_freqlpn_db(params_t *params, gk_graph_t *graph)
{
int32_t i, ii, k, nvtxs, maxlbl;
ssize_t j, *xadj;
int32_t *adjncy, *labels, *freq, *perm, *dbucket;
gk_i32kv_t *cand;
nvtxs = graph->nvtxs;
xadj = graph->xadj;
adjncy = graph->adjncy;
labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
freq = gk_i32smalloc(nvtxs, 0, "freq");
perm = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
dbucket = gk_i32malloc(nvtxs, "dbucket");
for (i=0; i<nvtxs; i++)
dbucket[i] = ((xadj[i+1]-xadj[i])>>3);
for (k=0; k<params->niter; k++) {
gk_i32randArrayPermuteFine(nvtxs, perm, 0);
for (ii=0; ii<nvtxs; ii++) {
i = perm[ii];
maxlbl = labels[i];
for (j=xadj[i]; j<xadj[i+1]; j++) {
if (dbucket[i] != dbucket[adjncy[j]])
continue;
freq[labels[adjncy[j]]]++;
if (freq[maxlbl] < freq[labels[adjncy[j]]])
maxlbl = labels[adjncy[j]];
else if (freq[maxlbl] == freq[labels[adjncy[j]]]) {
if (RandomInRange(2))
maxlbl = labels[adjncy[j]];
}
}
for (j=xadj[i]; j<xadj[i+1]; j++)
freq[labels[adjncy[j]]] = 0;
labels[i] = maxlbl;
}
}
cand = gk_i32kvmalloc(nvtxs, "cand");
for (i=0; i<nvtxs; i++) {
cand[i].key = labels[i];
cand[i].val = i;
}
gk_i32kvsorti(nvtxs, cand);
for (i=0; i<nvtxs; i++)
perm[cand[i].val] = i;
gk_free((void **)&labels, &freq, &dbucket, &cand, LTERM);
return perm;
}
/*************************************************************************/
/*! This function re-orders the graph by:
- performing a fixed number of min-label propagation iterations
- locally renumbers the vertices with the same label
*/
/*************************************************************************/
int32_t *reorder_minlpn(params_t *params, gk_graph_t *graph)
{
int32_t i, ii, k, nvtxs, minlbl;
ssize_t j, *xadj;
int32_t *adjncy, *labels, *perm;
gk_i32kv_t *cand;
nvtxs = graph->nvtxs;
xadj = graph->xadj;
adjncy = graph->adjncy;
labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
perm = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
for (k=0; k<params->niter; k++) {
for (i=0; i<nvtxs; i++) {
minlbl = labels[i];
for (j=xadj[i]; j<xadj[i+1]; j++) {
if (minlbl > labels[adjncy[j]])
minlbl = labels[adjncy[j]];
}
labels[i] = minlbl;
}
}
cand = gk_i32kvmalloc(nvtxs, "cand");
for (i=0; i<nvtxs; i++) {
cand[i].key = labels[i];
cand[i].val = i;
}
gk_i32kvsorti(nvtxs, cand);
for (i=0; i<nvtxs; i++) {
perm[cand[i].val] = i;
//if (i>0 && cand[i].key != cand[i-1].key)
// printf("%10d %10d\n", i-1, cand[i-1].key);
}
//printf("%10d %10d\n", i-1, cand[i-1].key);
gk_free((void **)&labels, &cand, LTERM);
return perm;
}
/*************************************************************************/
/*! This function re-orders the graph by:
- performing a fixed number of min-label propagation iterations
- restricts that propagation to take place within similar degree buckets
of vertices
- locally renumbers the vertices with the same label
*/
/*************************************************************************/
int32_t *reorder_minlpn_db(params_t *params, gk_graph_t *graph)
{
int32_t i, ii, k, nvtxs, minlbl;
ssize_t j, *xadj;
int32_t *adjncy, *labels, *perm, *dbucket;
gk_i32kv_t *cand;
nvtxs = graph->nvtxs;
xadj = graph->xadj;
adjncy = graph->adjncy;
labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
perm = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
dbucket = gk_i32malloc(nvtxs, "dbucket");
for (i=0; i<nvtxs; i++)
dbucket[i] = ((xadj[i+1]-xadj[i])>>3);
for (k=0; k<params->niter; k++) {
for (i=0; i<nvtxs; i++) {
minlbl = labels[i];
for (j=xadj[i]; j<xadj[i+1]; j++) {
if (dbucket[i] != dbucket[adjncy[j]])
continue;
if (minlbl > labels[adjncy[j]])
minlbl = labels[adjncy[j]];
}
labels[i] = minlbl;
}
}
cand = gk_i32kvmalloc(nvtxs, "cand");
for (i=0; i<nvtxs; i++) {
cand[i].key = labels[i];
cand[i].val = i;
}
gk_i32kvsorti(nvtxs, cand);
for (i=0; i<nvtxs; i++) {
perm[cand[i].val] = i;
//if (i>0 && cand[i].key != cand[i-1].key)
// printf("%10d %10d\n", i-1, cand[i-1].key);
}
//printf("%10d %10d\n", i-1, cand[i-1].key);
gk_free((void **)&labels, &dbucket, &cand, LTERM);
return perm;
}
/*************************************************************************/
/*! This function prints run parameters */
/*************************************************************************/
void print_init_info(params_t *params, gk_graph_t *graph)
{
printf("*******************************************************************************\n");
printf(" gkgraph\n\n");
printf("Graph Information ----------------------------------------------------------\n");
printf(" input file=%s, [%d, %zd]\n",
params->infile, graph->nvtxs, graph->xadj[graph->nvtxs]);
printf("\n");
printf("Options --------------------------------------------------------------------\n");
printf(" lnbits=%d, cnbits=%d, type=%d, niter=%d, lamda=%f, eps=%e\n",
params->lnbits, params->cnbits, params->type, params->niter,
params->lamda, params->eps);
printf("\n");
printf("Working... -----------------------------------------------------------------\n");
}
/*************************************************************************/
/*! This function prints final statistics */
/*************************************************************************/
void print_final_info(params_t *params)
{
printf("\n");
printf("Memory Usage Information -----------------------------------------------------\n");
printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
printf("********************************************************************************\n");
}
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->lnbits = 6;
params->cnbits = 13;
params->type = 1;
params->niter = 1;
params->eps = 1e-10;
params->lamda = 0.20;
params->nosort = 0;
params->write = 0;
params->infile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_LNBITS:
if (gk_optarg) params->lnbits = atoi(gk_optarg);
break;
case CMD_CNBITS:
if (gk_optarg) params->cnbits = atoi(gk_optarg);
break;
case CMD_TYPE:
if (gk_optarg) params->type = atoi(gk_optarg);
break;
case CMD_NITER:
if (gk_optarg) params->niter = atoi(gk_optarg);
break;
case CMD_EPS:
if (gk_optarg) params->eps = atof(gk_optarg);
break;
case CMD_LAMDA:
if (gk_optarg) params->lamda = atof(gk_optarg);
break;
case CMD_NOSORT:
params->nosort = 1;
break;
case CMD_WRITE:
params->write = 1;
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 1) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->infile = gk_strdup(argv[gk_optind++]);
if (argc-gk_optind > 0)
params->outfile = gk_strdup(argv[gk_optind++]);
else
params->outfile = gk_strdup("gkgraph.out");
if (!gk_fexists(params->infile))
errexit("input file %s does not exist.\n", params->infile);
return params;
}

@ -0,0 +1,346 @@
/*!
\file gksort.c
\brief Testing module for the various sorting routines in GKlib
\date Started 4/4/2007
\author George
\version\verbatim $Id: gksort.c 11058 2011-11-10 00:02:50Z karypis $ \endverbatim
*/
#include <GKlib.h>
#define N 10000
/*************************************************************************/
/*! Testing module for gk_?isort() routine */
/*************************************************************************/
void test_isort()
{
gk_idx_t i;
int array[N];
/* test the increasing sort */
printf("Testing iisort...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432);
gk_isorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i] > array[i+1])
printf("gk_isorti error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
}
/* test the decreasing sort */
printf("Testing disort...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432);
gk_isortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i] < array[i+1])
printf("gk_isortd error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
}
}
/*************************************************************************/
/*! Testing module for gk_?fsort() routine */
/*************************************************************************/
void test_fsort()
{
gk_idx_t i;
float array[N];
/* test the increasing sort */
printf("Testing ifsort...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
gk_fsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i] > array[i+1])
printf("gk_fsorti error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
}
/* test the decreasing sort */
printf("Testing dfsort...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
gk_fsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i] < array[i+1])
printf("gk_fsortd error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
}
}
/*************************************************************************/
/*! Testing module for gk_?idxsort() routine */
/*************************************************************************/
void test_idxsort()
{
gk_idx_t i;
gk_idx_t array[N];
/* test the increasing sort */
printf("Testing idxsorti...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432);
gk_idxsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i] > array[i+1])
printf("gk_idxsorti error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
}
/* test the decreasing sort */
printf("Testing idxsortd...\n");
for (i=0; i<N; i++)
array[i] = RandomInRange(123432);
gk_idxsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i] < array[i+1])
printf("gk_idxsortd error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
}
}
/*************************************************************************/
/*! Testing module for gk_?ikvsort() routine */
/*************************************************************************/
void test_ikvsort()
{
gk_idx_t i;
gk_ikv_t array[N];
/* test the increasing sort */
printf("Testing ikvsorti...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432);
array[i].val = i;
}
gk_ikvsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key > array[i+1].key)
printf("gk_ikvsorti error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing ikvsortd...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432);
array[i].val = i;
}
gk_ikvsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key < array[i+1].key)
printf("gk_ikvsortd error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
}
/*************************************************************************/
/*! Testing module for gk_?fkvsort() routine */
/*************************************************************************/
void test_fkvsort()
{
gk_idx_t i;
gk_fkv_t array[N];
/* test the increasing sort */
printf("Testing fkvsorti...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
array[i].val = i;
}
gk_fkvsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key > array[i+1].key)
printf("gk_fkvsorti error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing fkvsortd...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
array[i].val = i;
}
gk_fkvsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key < array[i+1].key)
printf("gk_fkvsortd error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
}
/*************************************************************************/
/*! Testing module for gk_?dkvsort() routine */
/*************************************************************************/
void test_dkvsort()
{
gk_idx_t i;
gk_dkv_t array[N];
/* test the increasing sort */
printf("Testing dkvsorti...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
array[i].val = i;
}
gk_dkvsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key > array[i+1].key)
printf("gk_dkvsorti error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing dkvsortd...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
array[i].val = i;
}
gk_dkvsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key < array[i+1].key)
printf("gk_dkvsortd error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
}
/*************************************************************************/
/*! Testing module for gk_?skvsort() routine */
/*************************************************************************/
void test_skvsort()
{
gk_idx_t i;
gk_skv_t array[N];
char line[256];
/* test the increasing sort */
printf("Testing skvsorti...\n");
for (i=0; i<N; i++) {
sprintf(line, "%d", RandomInRange(123432));
array[i].key = gk_strdup(line);
array[i].val = i;
}
gk_skvsorti(N, array);
for (i=0; i<N-1; i++) {
if (strcmp(array[i].key, array[i+1].key) > 0)
printf("gk_skvsorti error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing skvsortd...\n");
for (i=0; i<N; i++) {
sprintf(line, "%d", RandomInRange(123432));
array[i].key = gk_strdup(line);
array[i].val = i;
}
gk_skvsortd(N, array);
for (i=0; i<N-1; i++) {
/*printf("%s\n", array[i].key);*/
if (strcmp(array[i].key, array[i+1].key) < 0)
printf("gk_skvsortd error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
}
}
/*************************************************************************/
/*! Testing module for gk_?idxkvsort() routine */
/*************************************************************************/
void test_idxkvsort()
{
gk_idx_t i;
gk_idxkv_t array[N];
/* test the increasing sort */
printf("Testing idxkvsorti...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432);
array[i].val = i;
}
gk_idxkvsorti(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key > array[i+1].key)
printf("gk_idxkvsorti error at index %zd [%zd %zd] [%zd %zd]\n",
(ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key,
(ssize_t)array[i].val, (ssize_t)array[i+1].val);
}
/* test the decreasing sort */
printf("Testing idxkvsortd...\n");
for (i=0; i<N; i++) {
array[i].key = RandomInRange(123432);
array[i].val = i;
}
gk_idxkvsortd(N, array);
for (i=0; i<N-1; i++) {
if (array[i].key < array[i+1].key)
printf("gk_idxkvsortd error at index %zd [%zd %zd] [%zd %zd]\n",
(ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key,
(ssize_t)array[i].val, (ssize_t)array[i+1].val);
}
}
int main()
{
test_isort();
test_fsort();
test_idxsort();
test_ikvsort();
test_fkvsort();
test_dkvsort();
test_skvsort();
test_idxkvsort();
}

@ -0,0 +1,268 @@
/*!
\file
\brief A program to test various implementations for unique.
\date 10/8/2020
\author George
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
ssize_t length, dupfactor;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_HELP 10
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: gkuniq length dupfactor",
" ",
" Required parameters",
" length",
" The length of the base array.",
" ",
" dupfactor",
" The number of times the initial array is replicated.",
" ",
" Optional parameters",
" -help",
" Prints this message.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[]);
int unique_v1(int n, int *input, int *output);
int unique_v2(int n, int *input, int *output);
int unique_v3(int n, int *input, int *output, int *r_maxsize, int **r_hmap);
void mem_flush(const void *p, unsigned int allocation_size);
/*************************************************************************/
/*! A function to flush the cache associated with an array */
/**************************************************************************/
void mem_flush(const void *p, unsigned int allocation_size)
{
#ifndef NO_X86
const size_t cache_line = 64;
const char *cp = (const char *)p;
size_t i = 0;
if (p == NULL || allocation_size <= 0)
return;
for (i = 0; i < allocation_size; i += cache_line) {
__asm__ volatile("clflush (%0)\n\t"
:
: "r"(&cp[i])
: "memory");
}
__asm__ volatile("sfence\n\t"
:
:
: "memory");
#endif
}
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
int i, j, k;
params_t *params;
double tmr;
int n, nunique, *input, *output;
int maxsize=0, *hmap=NULL;
params = parse_cmdline(argc, argv);
/* create the input data */
n = params->length*params->dupfactor;
input = gk_imalloc(n, "input");
output = gk_imalloc(n, "output");
for (i=0; i<params->length; i++) {
k = RandomInRange(n);
for (j=0; j<params->dupfactor; j++)
input[j*params->length+i] = k;
}
gk_clearwctimer(tmr);
gk_startwctimer(tmr);
mem_flush(input, n*sizeof(int));
mem_flush(output, n*sizeof(int));
nunique = unique_v1(n, input, output);
gk_stopwctimer(tmr);
printf(" V1: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
gk_clearwctimer(tmr);
gk_startwctimer(tmr);
mem_flush(input, n*sizeof(int));
mem_flush(output, n*sizeof(int));
nunique = unique_v2(n, input, output);
gk_stopwctimer(tmr);
printf(" V2: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
gk_clearwctimer(tmr);
gk_startwctimer(tmr);
mem_flush(input, n*sizeof(int));
mem_flush(output, n*sizeof(int));
nunique = unique_v3(n, input, output, &maxsize, &hmap);
gk_stopwctimer(tmr);
printf("V3c: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
gk_clearwctimer(tmr);
gk_startwctimer(tmr);
mem_flush(input, n*sizeof(int));
mem_flush(output, n*sizeof(int));
nunique = unique_v3(n, input, output, &maxsize, &hmap);
gk_stopwctimer(tmr);
printf("V3w: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
gk_free((void **)&input, &output, &hmap, LTERM);
}
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 2) {
printf("Unrecognized parameters.");
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
}
params->length = atoi(argv[gk_optind++]);
params->dupfactor = atoi(argv[gk_optind++]);
return params;
}
/*************************************************************************/
/*! gklib-sort based approach */
/*************************************************************************/
int unique_v1(int n, int *input, int *output)
{
int i, j;
gk_isorti(n, input);
output[0] = input[0];
for (j=0, i=1; i<n; i++) {
if (output[j] != input[i])
output[++j] = input[i];
}
return j+1;
}
/*************************************************************************/
/*! hash-table based approach */
/*************************************************************************/
int unique_v2(int n, int *input, int *output)
{
int i, j, k, nuniq, size, mask;
int *hmap;
for (size=1; size<2*n; size*=2);
mask = size-1;
//printf("size: %d, mask: %x\n", size, mask);
hmap = gk_ismalloc(size, -1, "hmap");
for (nuniq=0, i=0; i<n; i++) {
k = input[i];
for (j=(k&mask); hmap[j]!=-1 && hmap[j]!=k; j=((j+1)&mask));
if (hmap[j] == -1) {
hmap[j] = k;
output[nuniq++] = k;
}
}
gk_free((void **)&hmap, LTERM);
return nuniq;
}
/*************************************************************************/
/*! hash-table based approach, where the htable is most likely pre-allocated */
/*************************************************************************/
int unique_v3(int n, int *input, int *output, int *r_maxsize, int **r_hmap)
{
int i, j, k, nuniq, size, mask;
int *hmap;
for (size=1; size<2*n; size*=2);
mask = size-1;
//printf("size: %d, mask: %x\n", size, mask);
if (size > *r_maxsize) {
gk_free((void **)r_hmap, LTERM);
hmap = *r_hmap = gk_ismalloc(size, -1, "hmap");
*r_maxsize = size;
}
else {
hmap = *r_hmap;
gk_iset(size, -1, hmap);
}
for (nuniq=0, i=0; i<n; i++) {
k = input[i];
for (j=(k&mask); hmap[j]!=-1 && hmap[j]!=k; j=((j+1)&mask));
if (hmap[j] == -1) {
hmap[j] = k;
output[nuniq++] = k;
}
}
return nuniq;
}

@ -0,0 +1,256 @@
/*!
\file
\brief A simple program to create multiple copies of an input matrix.
\date 5/30/2013
\author George
\version \verbatim $Id: grKx.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int inf, outf;
int numbering; /* input numbering (output when applicable) */
int readvals; /* input values (output when applicable) */
int writevals; /* output values */
int rshuf, cshuf; /* random shuffle of rows/columns */
int symmetric; /* a symmetric shuffle */
int ncopies; /* the copies of the graph to create */
char *infile; /* input file */
char *outfile; /* output file */
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_NUMONE 1
#define CMD_NOREADVALS 2
#define CMD_NOWRITEVALS 3
#define CMD_RSHUF 4
#define CMD_CSHUF 5
#define CMD_SYMMETRIC 6
#define CMD_HELP 100
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"numone", 0, 0, CMD_NUMONE},
{"noreadvals", 0, 0, CMD_NOREADVALS},
{"nowritevals", 0, 0, CMD_NOWRITEVALS},
{"rshuf", 0, 0, CMD_RSHUF},
{"cshuf", 0, 0, CMD_CSHUF},
{"symmetric", 0, 0, CMD_SYMMETRIC},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>",
" ",
" Required parameters",
" infile, outfile",
" The name of the input/output CSR file.",
" ",
" inf/outf",
" The format of the input/output file.",
" Supported values are:",
" 1 GK_CSR_FMT_CLUTO",
" 2 GK_CSR_FMT_CSR",
" 3 GK_CSR_FMT_METIS",
" 4 GK_CSR_FMT_BINROW",
" 6 GK_CSR_FMT_IJV",
" 7 GK_CSR_FMT_BIJV",
" ",
" Optional parameters",
" -numone",
" Specifies that the numbering of the input file starts from 1. ",
" It only applies to CSR/IJV formats.",
" ",
" -nowritevals",
" Specifies that no values will be output.",
" ",
" -noreadvals",
" Specifies that the values will not be read when applicable.",
" ",
" -rshuf",
" Specifies that the rows will be randmly shuffled prior to output.",
" ",
" -cshuf",
" Specifies that the columns will be randmly shuffled prior to output.",
" ",
" -symmetric",
" Specifies that the row+column shuffling will be symmetric.",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>",
" use 'csrconv -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->numbering = 0;
params->readvals = 1;
params->writevals = 1;
params->rshuf = 0;
params->cshuf = 0;
params->symmetric = 0;
params->inf = -1;
params->outf = -1;
params->infile = NULL;
params->outfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_NUMONE:
params->numbering = 1;
break;
case CMD_NOREADVALS:
params->readvals = 0;
break;
case CMD_NOWRITEVALS:
params->writevals = 0;
break;
case CMD_RSHUF:
params->rshuf = 1;
break;
case CMD_CSHUF:
params->cshuf = 1;
break;
case CMD_SYMMETRIC:
params->symmetric = 1;
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 5) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->infile = gk_strdup(argv[gk_optind++]);
params->inf = atoi(argv[gk_optind++]);
params->outfile = gk_strdup(argv[gk_optind++]);
params->outf = atoi(argv[gk_optind++]);
params->ncopies = atoi(argv[gk_optind++]);
if (!gk_fexists(params->infile))
errexit("input file %s does not exist.\n", params->infile);
return params;
}
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
ssize_t i, j, k, knnz, nrows, ncols, ncopies;
int what;
params_t *params;
gk_csr_t *mat, *kmat, *smat;
/* get command-line options */
params = parse_cmdline(argc, argv);
/* read the data */
mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering);
/* create the copies */
ncopies = params->ncopies;
nrows = mat->nrows;
ncols = mat->ncols;
knnz = mat->rowptr[nrows]*ncopies;
kmat = gk_csr_Create();
kmat->nrows = nrows*ncopies;
kmat->ncols = ncols*ncopies;
kmat->rowptr = gk_zmalloc(kmat->nrows+1, "rowptr");
kmat->rowind = gk_imalloc(knnz, "rowind");
if (mat->rowval)
kmat->rowval = gk_fmalloc(knnz, "rowval");
kmat->rowptr[0] = knnz = 0;
for (k=0; k<ncopies; k++) {
for (i=0; i<nrows; i++) {
for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++, knnz++) {
kmat->rowind[knnz] = mat->rowind[j] + k*ncols;
if (mat->rowval)
kmat->rowval[knnz] = mat->rowval[j];
}
kmat->rowptr[k*nrows+i+1] = knnz;
}
}
gk_csr_Free(&mat);
mat = kmat;
if (params->rshuf || params->cshuf) {
if (params->rshuf && params->cshuf)
what = GK_CSR_ROWCOL;
else if (params->rshuf)
what = GK_CSR_ROW;
else
what = GK_CSR_COL;
smat = gk_csr_Shuffle(mat, what, params->symmetric);
gk_csr_Free(&mat);
mat = smat;
}
if (params->writevals && mat->rowval == NULL)
mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval");
gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0);
gk_csr_Free(&mat);
}

@ -0,0 +1,304 @@
/*!
\file
\brief It takes as input two CSR matrices and finds for each row of the
first matrix the most similar rows in the second matrix.
\date 9/27/2014
\author George
\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int simtype; /*!< The similarity type to use */
int nnbrs; /*!< The maximum number of nearest neighbots to output */
float minsim; /*!< The minimum similarity to use for keeping neighbors */
int verbosity; /*!< The reporting verbosity level */
char *qfile; /*!< The file storing the query documents */
char *cfile; /*!< The file storing the collection documents */
char *outfile; /*!< The file where the output will be stored */
/* timers */
double timer_global;
double timer_1;
double timer_2;
double timer_3;
double timer_4;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
/* Versions */
#define VER_MAJOR 0
#define VER_MINOR 1
#define VER_SUBMINOR 0
/* Command-line option codes */
#define CMD_SIMTYPE 10
#define CMD_NNBRS 20
#define CMD_MINSIM 22
#define CMD_VERBOSITY 70
#define CMD_HELP 100
/* The text labels for the different simtypes */
static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""};
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"simtype", 1, 0, CMD_SIMTYPE},
{"nnbrs", 1, 0, CMD_NNBRS},
{"minsim", 1, 0, CMD_MINSIM},
{"verbosity", 1, 0, CMD_VERBOSITY},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
static gk_StringMap_t simtype_options[] = {
{"cos", GK_CSR_COS},
{"jac", GK_CSR_JAC},
{NULL, 0}
};
/*-------------------------------------------------------------------
* Mini help
*-------------------------------------------------------------------*/
static char helpstr[][100] =
{
" ",
"Usage: m2mnbrs [options] qfile cfile [outfile]",
" ",
" Options",
" -simtype=string",
" Specifies the type of similarity to use. Possible values are:",
" cos - Cosine similarity",
" jac - Jacquard similarity [default]",
" ",
" -nnbrs=int",
" Specifies the maximum number of nearest neighbors.",
" A value of -1 indicates that all neighbors will be considered.",
" Default value is 100.",
" ",
" -minsim=float",
" The minimum allowed similarity between neighbors. ",
" Default value is .25.",
" ",
" -verbosity=int",
" Specifies the level of debugging information to be displayed.",
" Default value is 0.",
" ",
" -help",
" Prints this message.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[]);
void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat);
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->simtype = GK_CSR_JAC;
params->nnbrs = 100;
params->minsim = .25;
params->verbosity = -1;
params->qfile = NULL;
params->cfile = NULL;
params->outfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_SIMTYPE:
if (gk_optarg) {
if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1)
errexit("Invalid simtype of %s.\n", gk_optarg);
}
break;
case CMD_NNBRS:
if (gk_optarg) params->nnbrs = atoi(gk_optarg);
break;
case CMD_MINSIM:
if (gk_optarg) params->minsim = atof(gk_optarg);
break;
case CMD_VERBOSITY:
if (gk_optarg) params->verbosity = atoi(gk_optarg);
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(EXIT_SUCCESS);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(EXIT_FAILURE);
}
}
/* Get the input/output file info */
if (argc-gk_optind < 1) {
printf("Missing input/output file info.\n Use %s -help for a summary of the options.\n", argv[0]);
exit(EXIT_FAILURE);
}
params->qfile = gk_strdup(argv[gk_optind++]);
params->cfile = gk_strdup(argv[gk_optind++]);
params->outfile = (gk_optind < argc ? gk_strdup(argv[gk_optind++]) : NULL);
if (!gk_fexists(params->qfile))
errexit("input file %s does not exist.\n", params->qfile);
if (!gk_fexists(params->cfile))
errexit("input file %s does not exist.\n", params->cfile);
return params;
}
/*************************************************************************/
/*! This is the entry point of the program */
/**************************************************************************/
int main(int argc, char *argv[])
{
params_t *params;
gk_csr_t *qmat, *cmat;
int rc = EXIT_SUCCESS;
params = parse_cmdline(argc, argv);
qmat = gk_csr_Read(params->qfile, GK_CSR_FMT_CSR, 1, 0);
cmat = gk_csr_Read(params->cfile, GK_CSR_FMT_CSR, 1, 0);
printf("********************************************************************************\n");
printf("sd (%d.%d.%d) Copyright 2014, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR);
printf(" simtype=%s, nnbrs=%d, minsim=%.2f\n",
simtypenames[params->simtype], params->nnbrs, params->minsim);
printf(" qfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
params->qfile, qmat->nrows, qmat->ncols, qmat->rowptr[qmat->nrows]);
printf(" cfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
params->cfile, cmat->nrows, cmat->ncols, cmat->rowptr[cmat->nrows]);
gk_clearwctimer(params->timer_global);
gk_clearwctimer(params->timer_1);
gk_clearwctimer(params->timer_2);
gk_clearwctimer(params->timer_3);
gk_clearwctimer(params->timer_4);
gk_startwctimer(params->timer_global);
FindNeighbors(params, qmat, cmat);
gk_stopwctimer(params->timer_global);
printf(" wclock: %.2lfs\n", gk_getwctimer(params->timer_global));
printf(" timer1: %.2lfs\n", gk_getwctimer(params->timer_1));
printf(" timer2: %.2lfs\n", gk_getwctimer(params->timer_2));
printf(" timer3: %.2lfs\n", gk_getwctimer(params->timer_3));
printf(" timer4: %.2lfs\n", gk_getwctimer(params->timer_4));
printf("********************************************************************************\n");
gk_csr_Free(&qmat);
gk_csr_Free(&cmat);
exit(rc);
}
/*************************************************************************/
/*! Reads and computes the neighbors of each query document against the
collection of documents */
/**************************************************************************/
void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat)
{
int iQ, iH, nhits;
int32_t *marker;
gk_fkv_t *hits, *cand;
FILE *fpout;
GKASSERT(qmat->ncols <= cmat->ncols);
/* if cosine, make rows unit length */
if (params->simtype == GK_CSR_COS) {
gk_csr_Normalize(qmat, GK_CSR_ROW, 2);
gk_csr_Normalize(cmat, GK_CSR_ROW, 2);
}
/* create the inverted index */
gk_csr_CreateIndex(cmat, GK_CSR_COL);
/* compute the row norms */
gk_csr_ComputeSquaredNorms(cmat, GK_CSR_ROW);
/* create the output file */
fpout = (params->outfile ? gk_fopen(params->outfile, "w", "FindNeighbors: fpout") : NULL);
/* allocate memory for the necessary working arrays */
hits = gk_fkvmalloc(cmat->nrows, "FindNeighbors: hits");
marker = gk_i32smalloc(cmat->nrows, -1, "FindNeighbors: marker");
cand = gk_fkvmalloc(cmat->nrows, "FindNeighbors: cand");
/* find the best neighbors for each query document */
gk_startwctimer(params->timer_1);
for (iQ=0; iQ<qmat->nrows; iQ++) {
if (params->verbosity > 0)
printf("Working on query %7d\n", iQ);
/* find the neighbors of the ith document */
nhits = gk_csr_GetSimilarRows(cmat,
qmat->rowptr[iQ+1]-qmat->rowptr[iQ],
qmat->rowind+qmat->rowptr[iQ],
qmat->rowval+qmat->rowptr[iQ],
params->simtype, params->nnbrs, params->minsim,
hits, marker, cand);
/* write the results in the file */
if (fpout) {
for (iH=0; iH<nhits; iH++)
fprintf(fpout, "%8d %8zd %.3f\n", iQ, hits[iH].val, hits[iH].key);
}
}
gk_stopwctimer(params->timer_1);
/* cleanup and exit */
if (fpout) gk_fclose(fpout);
gk_free((void **)&hits, &marker, &cand, LTERM);
}

@ -0,0 +1,306 @@
/*!
\file
\brief A simple (personalized) random walk program to test GKlib's routines
\date 6/12/2008
\author George
\version \verbatim $Id$ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Data structures for the code */
/*************************************************************************/
typedef struct {
int niter;
int ntvs;
int ppr;
float eps;
float lamda;
char *infile;
char *outfile;
} params_t;
/*************************************************************************/
/*! Constants */
/*************************************************************************/
#define CMD_NITER 1
#define CMD_EPS 2
#define CMD_LAMDA 3
#define CMD_PPR 4
#define CMD_NTVS 5
#define CMD_HELP 10
/*************************************************************************/
/*! Local variables */
/*************************************************************************/
static struct gk_option long_options[] = {
{"niter", 1, 0, CMD_NITER},
{"lamda", 1, 0, CMD_LAMDA},
{"eps", 1, 0, CMD_EPS},
{"ppr", 1, 0, CMD_PPR},
{"ntvs", 1, 0, CMD_NTVS},
{"help", 0, 0, CMD_HELP},
{0, 0, 0, 0}
};
/*-------------------------------------------------------------------*/
/* Mini help */
/*-------------------------------------------------------------------*/
static char helpstr[][100] = {
" ",
"Usage: rw [options] <graph-file> <out-file>",
" ",
" Required parameters",
" graph-file",
" The name of the file storing the transactions. The file is in ",
" Metis' graph format.",
" ",
" Optional parameters",
" -niter=int",
" Specifies the maximum number of iterations. [default: 100]",
" ",
" -lamda=float",
" Specifies the follow-the-adjacent-links probability. [default: 0.80]",
" ",
" -eps=float",
" Specifies the error tollerance. [default: 1e-10]",
" ",
" -ppr=int",
" Specifies the source of the personalized PR. [default: -1]",
" ",
" -ntvs=int",
" Specifies the number of test-vectors to compute. [default: -1]",
" ",
" -help",
" Prints this message.",
""
};
static char shorthelpstr[][100] = {
" ",
" Usage: rw [options] <graph-file> <out-file>",
" use 'rw -help' for a summary of the options.",
""
};
/*************************************************************************/
/*! Function prototypes */
/*************************************************************************/
void print_init_info(params_t *params, gk_csr_t *mat);
void print_final_info(params_t *params);
params_t *parse_cmdline(int argc, char *argv[]);
/*************************************************************************/
/*! the entry point */
/**************************************************************************/
int main(int argc, char *argv[])
{
ssize_t i, j, niter;
params_t *params;
gk_csr_t *mat;
FILE *fpout;
/* get command-line options */
params = parse_cmdline(argc, argv);
/* read the data */
mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1);
/* display some basic stats */
print_init_info(params, mat);
if (params->ntvs != -1) {
/* compute the pr for different randomly generated restart-distribution vectors */
float **prs;
prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs");
/* generate the random restart vectors */
for (j=0; j<params->ntvs; j++) {
for (i=0; i<mat->nrows; i++)
prs[j][i] = RandomInRange(931);
gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1);
niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]);
printf("tvs#: %zd; niters: %zd\n", j, niter);
}
/* output the computed pr scores */
fpout = gk_fopen(params->outfile, "w", "main: outfile");
for (i=0; i<mat->nrows; i++) {
for (j=0; j<params->ntvs; j++)
fprintf(fpout, "%.4e ", prs[j][i]);
fprintf(fpout, "\n");
}
gk_fclose(fpout);
gk_fFreeMatrix(&prs, params->ntvs, mat->nrows);
}
else if (params->ppr != -1) {
/* compute the personalized pr from the specified vertex */
float *pr;
pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr");
pr[params->ppr-1] = 1.0;
niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
printf("ppr: %d; niters: %zd\n", params->ppr, niter);
/* output the computed pr scores */
fpout = gk_fopen(params->outfile, "w", "main: outfile");
for (i=0; i<mat->nrows; i++)
fprintf(fpout, "%.4e\n", pr[i]);
gk_fclose(fpout);
gk_free((void **)&pr, LTERM);
}
else {
/* compute the standard pr */
int jmax;
float diff, maxdiff;
float *pr;
pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr");
niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
printf("pr; niters: %zd\n", niter);
/* output the computed pr scores */
fpout = gk_fopen(params->outfile, "w", "main: outfile");
for (i=0; i<mat->nrows; i++) {
for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) {
maxdiff = diff;
jmax = mat->rowind[j];
}
}
fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i],
mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1);
}
gk_fclose(fpout);
gk_free((void **)&pr, LTERM);
}
gk_csr_Free(&mat);
/* display some final stats */
print_final_info(params);
}
/*************************************************************************/
/*! This function prints run parameters */
/*************************************************************************/
void print_init_info(params_t *params, gk_csr_t *mat)
{
printf("*******************************************************************************\n");
printf(" fis\n\n");
printf("Matrix Information ---------------------------------------------------------\n");
printf(" input file=%s, [%d, %d, %zd]\n",
params->infile, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
printf("\n");
printf("Options --------------------------------------------------------------------\n");
printf(" niter=%d, ntvs=%d, ppr=%d, lamda=%f, eps=%e\n",
params->niter, params->ntvs, params->ppr, params->lamda, params->eps);
printf("\n");
printf("Performing random walks... ----------------------------------------------\n");
}
/*************************************************************************/
/*! This function prints final statistics */
/*************************************************************************/
void print_final_info(params_t *params)
{
printf("\n");
printf("Memory Usage Information -----------------------------------------------------\n");
printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
printf("********************************************************************************\n");
}
/*************************************************************************/
/*! This is the entry point of the command-line argument parser */
/*************************************************************************/
params_t *parse_cmdline(int argc, char *argv[])
{
int i;
int c, option_index;
params_t *params;
params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
/* initialize the params data structure */
params->niter = 100;
params->ppr = -1;
params->ntvs = -1;
params->eps = 1e-10;
params->lamda = 0.80;
params->infile = NULL;
params->outfile = NULL;
/* Parse the command line arguments */
while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
switch (c) {
case CMD_NITER:
if (gk_optarg) params->niter = atoi(gk_optarg);
break;
case CMD_NTVS:
if (gk_optarg) params->ntvs = atoi(gk_optarg);
break;
case CMD_PPR:
if (gk_optarg) params->ppr = atoi(gk_optarg);
break;
case CMD_EPS:
if (gk_optarg) params->eps = atof(gk_optarg);
break;
case CMD_LAMDA:
if (gk_optarg) params->lamda = atof(gk_optarg);
break;
case CMD_HELP:
for (i=0; strlen(helpstr[i]) > 0; i++)
printf("%s\n", helpstr[i]);
exit(0);
break;
case '?':
default:
printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
exit(0);
}
}
if (argc-gk_optind != 2) {
printf("Unrecognized parameters.");
for (i=0; strlen(shorthelpstr[i]) > 0; i++)
printf("%s\n", shorthelpstr[i]);
exit(0);
}
params->infile = gk_strdup(argv[gk_optind++]);
params->outfile = gk_strdup(argv[gk_optind++]);
if (!gk_fexists(params->infile))
errexit("input file %s does not exist.\n", params->infile);
if (params->ppr != -1 && params->ntvs != -1)
errexit("Only one of the -ppr and -ntvs options can be specified.\n");
return params;
}

@ -0,0 +1,98 @@
/*!
\file
\brief A simple program to convert a tensor in coordinate format into an unfolded
matrix
\author George
*/
#include <GKlib.h>
int main(int argc, char *argv[])
{
size_t nnz, i, j, k, nI, nJ, nK, nrows, ncols;
int32_t *I, *J, *K, *rowind, *colind;
ssize_t *rowptr, *colptr;
float *V, *rowval, *colval;
if (argc != 2)
errexit("Usage %s <infile> [%d]\n", argv[0], argc);
if (!gk_fexists(argv[1]))
errexit("File %s does not exist.\n", argv[1]);
gk_getfilestats(argv[1], &nnz, NULL, NULL, NULL);
I = gk_i32malloc(nnz, "I");
J = gk_i32malloc(nnz, "J");
K = gk_i32malloc(nnz, "K");
V = gk_fmalloc(nnz, "V");
fprintf(stderr, "Input nnz: %zd\n", nnz);
FILE *fpin = gk_fopen(argv[1], "r", "infile");
for (i=0; i<nnz; i++) {
if (4 != fscanf(fpin, "%d %d %d %f", K+i, I+i, J+i, V+i))
errexit("Failed to read 4 values in line %zd\n", i);
K[i]--; I[i]--; J[i]--;
}
gk_fclose(fpin);
nI = gk_i32max(nnz, I, 1)+1;
nJ = gk_i32max(nnz, J, 1)+1;
nK = gk_i32max(nnz, K, 1)+1;
fprintf(stderr, "nI: %zd, nJ: %zd, nK: %zd\n", nI, nJ, nK);
nrows = nK*nI;
ncols = nJ;
rowptr = gk_zsmalloc(nrows+1, 0, "rowptr");
for (i=0; i<nnz; i++)
rowptr[K[i]*nI+I[i]]++;
MAKECSR(i, nrows, rowptr);
rowind = gk_i32malloc(nnz, "rowind");
rowval = gk_fmalloc(nnz, "rowval");
for (i=0; i<nnz; i++) {
rowind[rowptr[K[i]*nI+I[i]]] = J[i];
rowval[rowptr[K[i]*nI+I[i]]] = V[i];
rowptr[K[i]*nI+I[i]]++;
}
SHIFTCSR(i, nrows, rowptr);
gk_free((void **)&I, &J, &K, &V, LTERM);
colptr = gk_zsmalloc(ncols+1, 0, "colptr");
colind = gk_i32malloc(nnz, "colind");
colval = gk_fmalloc(nnz, "colval");
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++)
colptr[rowind[j]]++;
}
MAKECSR(i, ncols, colptr);
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++) {
colind[colptr[rowind[j]]] = i;
colval[colptr[rowind[j]]] = rowval[j];
colptr[rowind[j]]++;
}
}
SHIFTCSR(i, ncols, colptr);
/* sanity check */
for (i=0; i<ncols; i++) {
for (j=colptr[i]+1; j<colptr[i+1]; j++) {
if (colind[j-1] == colind[j])
fprintf(stderr, "Duplicate row indices: %d %d %d\n", (int)i, colind[j], colind[j-1]);
}
}
printf("%zd %zd %zd\n", nrows, ncols, nnz);
for (i=0; i<ncols; i++) {
printf("%zd\n", colptr[i+1]-colptr[i]);
for (j=colptr[i]; j<colptr[i+1]; j++)
printf("%d %.3f\n", colind[j], colval[j]);
}
}

@ -0,0 +1,82 @@
/*!
\file strings.c
\brief Testing module for the string functions in GKlib
\date Started 3/5/2007
\author George
\version\verbatim $Id: strings.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Testing module for gk_strstr_replace() */
/*************************************************************************/
void test_strstr_replace()
{
char *new_str;
int rc;
rc = gk_strstr_replace("This is a simple string", "s", "S", "", &new_str);
printf("%d, %s.\n", rc, new_str);
gk_free((void **)&new_str, LTERM);
rc = gk_strstr_replace("This is a simple string", "s", "S", "g", &new_str);
printf("%d, %s.\n", rc, new_str);
gk_free((void **)&new_str, LTERM);
rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "g", &new_str);
printf("%d, %s.\n", rc, new_str);
gk_free((void **)&new_str, LTERM);
rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "ig", &new_str);
printf("%d, %s.\n", rc, new_str);
gk_free((void **)&new_str, LTERM);
rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w(\\w+)\\w\\b", "$1", "ig", &new_str);
printf("%d, %s.\n", rc, new_str);
gk_free((void **)&new_str, LTERM);
rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w+\\b", "word", "ig", &new_str);
printf("%d, %s.\n", rc, new_str);
gk_free((void **)&new_str, LTERM);
rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
"(http://www\\.cs\\.umn\\.edu/)(.*)-T(\\d+)", "$1$2-P$3", "g", &new_str);
printf("%d, %s.\n", rc, new_str);
gk_free((void **)&new_str, LTERM);
rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
"(\\d+)", "number:$1", "ig", &new_str);
printf("%d, %s.\n", rc, new_str);
gk_free((void **)&new_str, LTERM);
rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
"(http://www\\.cs\\.umn\\.edu/)", "[$1]", "g", &new_str);
printf("%d, %s.\n", rc, new_str);
gk_free((void **)&new_str, LTERM);
}
int main()
{
test_strstr_replace();
/*
{
int i;
for (i=0; i<1000; i++)
printf("%d\n", RandomInRange(3));
}
*/
}

@ -0,0 +1,52 @@
/*!
\file timers.c
\brief Various timing functions
\date Started 4/12/2007
\author George
\version\verbatim $Id: timers.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************
* This function returns the CPU seconds
**************************************************************************/
double gk_WClockSeconds(void)
{
#ifdef __GNUC__
struct timeval ctime;
gettimeofday(&ctime, NULL);
return (double)ctime.tv_sec + (double).000001*ctime.tv_usec;
#else
return (double)time(NULL);
#endif
}
/*************************************************************************
* This function returns the CPU seconds
**************************************************************************/
double gk_CPUSeconds(void)
{
//#ifdef __OPENMP__
#ifdef __OPENMPXXXX__
return omp_get_wtime();
#else
#if defined(WIN32) || defined(__MINGW32__)
return((double) clock()/CLOCKS_PER_SEC);
#else
struct rusage r;
getrusage(RUSAGE_SELF, &r);
return ((r.ru_utime.tv_sec + r.ru_stime.tv_sec) + 1.0e-6*(r.ru_utime.tv_usec + r.ru_stime.tv_usec));
#endif
#endif
}

@ -0,0 +1,77 @@
/*!
\file tokenizer.c
\brief String tokenization routines
This file contains various routines for splitting an input string into
tokens and returning them in form of a list. The goal is to mimic perl's
split function.
\date Started 11/23/04
\author George
\version\verbatim $Id: tokenizer.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
*/
#include <GKlib.h>
/************************************************************************
* This function tokenizes a string based on the user-supplied delimiters
* list. The resulting tokens are returned into an array of strings.
*************************************************************************/
void gk_strtokenize(char *str, char *delim, gk_Tokens_t *tokens)
{
int i, ntoks, slen;
tokens->strbuf = gk_strdup(str);
slen = strlen(str);
str = tokens->strbuf;
/* Scan once to determine the number of tokens */
for (ntoks=0, i=0; i<slen;) {
/* Consume all the consecutive characters from the delimiters list */
while (i<slen && strchr(delim, str[i]))
i++;
if (i == slen)
break;
ntoks++;
/* Consume all the consecutive characters from the token */
while (i<slen && !strchr(delim, str[i]))
i++;
}
tokens->ntoks = ntoks;
tokens->list = (char **)gk_malloc(ntoks*sizeof(char *), "strtokenize: tokens->list");
/* Scan a second time to mark and link the tokens */
for (ntoks=0, i=0; i<slen;) {
/* Consume all the consecutive characters from the delimiters list */
while (i<slen && strchr(delim, str[i]))
str[i++] = '\0';
if (i == slen)
break;
tokens->list[ntoks++] = str+i;
/* Consume all the consecutive characters from the token */
while (i<slen && !strchr(delim, str[i]))
i++;
}
}
/************************************************************************
* This function frees the memory associated with a gk_Tokens_t
*************************************************************************/
void gk_freetokenslist(gk_Tokens_t *tokens)
{
gk_free((void *)&tokens->list, &tokens->strbuf, LTERM);
}

@ -0,0 +1,11 @@
/*
\file win32/adapt.c
\brief Implementation of Win32 adaptation of libc functions
*/
#include "adapt.h"
pid_t getpid(void)
{
return GetCurrentProcessId();
}

@ -0,0 +1,14 @@
/*
\file win32/adapt.h
\brief Declaration of Win32 adaptation of POSIX functions and types
*/
#ifndef _WIN32_ADAPT_H_
#define _WIN32_ADAPT_H_
#include <windows.h>
typedef DWORD pid_t;
pid_t getpid(void);
#endif /* _WIN32_ADAPT_H_ */
Loading…
Cancel
Save