commit f0eba24f897981c6cefb439be840440bbc217c3b
Author: lib <noreply@acem.ece.illinois.edu>
Date:   Thu Jun 22 15:57:42 2023 -0500

    Initial commit

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..9cd1b4b
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,31 @@
+cmake_minimum_required(VERSION 2.8)
+project(GKlib C)
+
+option(BUILD_SHARED_LIBS "Build shared libraries (.dll/.so) instead of static ones (.lib/.a)" OFF)
+
+get_filename_component(abs "." ABSOLUTE)
+set(GKLIB_PATH ${abs})
+unset(abs)
+include(GKlibSystem.cmake)
+
+include_directories(".")
+if(MSVC)
+  include_directories("win32")
+  file(GLOB win32_sources RELATIVE "win32" "*.c")
+else(MSVC)
+  set(win32_sources, "")
+endif(MSVC)
+
+add_library(GKlib ${GKlib_sources} ${win32_sources})
+
+if(UNIX)
+  target_link_libraries(GKlib m)
+endif(UNIX)
+
+include_directories("test")
+add_subdirectory("test")
+
+install(TARGETS GKlib
+  ARCHIVE DESTINATION lib/${LINSTALL_PATH}
+  LIBRARY DESTINATION lib/${LINSTALL_PATH})
+install(FILES ${GKlib_includes} DESTINATION include/${HINSTALL_PATH})
diff --git a/DEPENDENCIES b/DEPENDENCIES
new file mode 100644
index 0000000..e69de29
diff --git a/GKlib.h b/GKlib.h
new file mode 100644
index 0000000..9278fe4
--- /dev/null
+++ b/GKlib.h
@@ -0,0 +1,85 @@
+/*
+ * GKlib.h
+ * 
+ * George's library of most frequently used routines
+ *
+ * $Id: GKlib.h 14866 2013-08-03 16:40:04Z karypis $
+ *
+ */
+
+#ifndef _GKLIB_H_
+#define _GKLIB_H_ 1
+
+#define GKMSPACE
+
+#if defined(_MSC_VER)
+#define __MSC__
+#endif
+#if defined(__ICC)
+#define __ICC__
+#endif
+
+
+#include "gk_arch.h" /*!< This should be here, prior to the includes */
+
+
+/*************************************************************************
+* Header file inclusion section
+**************************************************************************/
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <memory.h>
+#include <errno.h>
+#include <ctype.h>
+#include <math.h>
+#include <float.h>
+#include <time.h>
+#include <string.h>
+#include <limits.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+#include <sys/stat.h>
+
+#if defined(__WITHPCRE__)
+  #include <pcreposix.h>
+#else
+  #if defined(USE_GKREGEX)
+    #include "gkregex.h"
+  #else
+    #include <regex.h>
+  #endif /* defined(USE_GKREGEX) */
+#endif /* defined(__WITHPCRE__) */
+
+
+
+#if defined(__OPENMP__) 
+#include <omp.h>
+#endif
+
+
+
+
+#include <gk_types.h>
+#include <gk_struct.h>
+#include <gk_externs.h>
+#include <gk_defs.h>
+#include <gk_macros.h>
+#include <gk_getopt.h>
+
+#include <gk_mksort.h>
+#include <gk_mkblas.h>
+#include <gk_mkmemory.h>
+#include <gk_mkpqueue.h>
+#include <gk_mkpqueue2.h>
+#include <gk_mkrandom.h>
+#include <gk_mkutils.h>
+
+#include <gk_proto.h>
+
+
+#endif  /* GKlib.h */
+
+
diff --git a/GKlibSystem.cmake b/GKlibSystem.cmake
new file mode 100644
index 0000000..31a1cf1
--- /dev/null
+++ b/GKlibSystem.cmake
@@ -0,0 +1,152 @@
+# Helper modules.
+include(CheckFunctionExists)
+include(CheckIncludeFile)
+
+# Setup options.
+option(GDB "enable use of GDB" OFF)
+option(ASSERT "turn asserts on" OFF)
+option(ASSERT2 "additional assertions" OFF)
+option(DEBUG "add debugging support" OFF)
+option(GPROF "add gprof support" OFF)
+option(VALGRIND "add valgrind support" OFF)
+option(OPENMP "enable OpenMP support" OFF)
+option(PCRE "enable PCRE support" OFF)
+option(GKREGEX "enable GKREGEX support" OFF)
+option(GKRAND "enable GKRAND support" OFF)
+option(NO_X86 "enable NO_X86 support" OFF)
+
+
+# Add compiler flags.
+if(MSVC)
+  set(GKlib_COPTS "/Ox")
+  set(GKlib_COPTIONS "-DWIN32 -DMSC -D_CRT_SECURE_NO_DEPRECATE -DUSE_GKREGEX")
+elseif(MINGW)
+  set(GKlib_COPTS "-DUSE_GKREGEX")
+else()
+  set(GKlib_COPTIONS "-DLINUX -D_FILE_OFFSET_BITS=64")
+endif(MSVC)
+if(CYGWIN)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DCYGWIN")
+endif(CYGWIN)
+if(CMAKE_COMPILER_IS_GNUCC)
+# GCC opts.
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -std=c99 -fno-strict-aliasing")
+if(VALGRIND)
+  set(GKlib_COPTIONS "${GK_COPTIONS} -march=x86-64 -mtune=generic")
+else()
+# -march=native is not a valid flag on PPC:
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "power|ppc|powerpc|ppc64|powerpc64" OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "ppc|ppc64"))
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -mtune=native")
+else()
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -march=native")
+endif()
+endif(VALGRIND)
+  if(NOT MINGW)
+      set(GKlib_COPTIONS "${GKlib_COPTIONS} -fPIC")
+  endif(NOT MINGW)
+# GCC warnings.
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror -Wall -pedantic -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unknown-pragmas -Wno-unused-label")
+elseif(${CMAKE_C_COMPILER_ID} MATCHES "Sun")
+# Sun insists on -xc99.
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -xc99")
+endif(CMAKE_COMPILER_IS_GNUCC)
+
+# Intel compiler
+if(${CMAKE_C_COMPILER_ID} MATCHES "Intel")
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -xHost -std=c99")
+endif()
+
+# Find OpenMP if it is requested.
+if(OPENMP)
+  include(FindOpenMP)
+  if(OPENMP_FOUND)
+    set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__OPENMP__ ${OpenMP_C_FLAGS}")
+  else()
+    message(WARNING "OpenMP was requested but support was not found")
+  endif(OPENMP_FOUND)
+endif(OPENMP)
+
+# Set the CPU type 
+if(NO_X86)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNO_X86=${NO_X86}")
+endif(NO_X86)
+
+# Add various definitions.
+if(GDB)
+  set(GKlib_COPTS "${GKlib_COPTS} -g")
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror")
+else()
+  set(GKlib_COPTS "-O3")
+endif(GDB)
+
+
+if(DEBUG)
+  set(GKlib_COPTS "-g")
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DDEBUG")
+endif(DEBUG)
+
+if(GPROF)
+  set(GKlib_COPTS "-pg")
+endif(GPROF)
+
+if(NOT ASSERT)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG")
+endif(NOT ASSERT)
+
+if(NOT ASSERT2)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG2")
+endif(NOT ASSERT2)
+
+
+# Add various options
+if(PCRE)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__WITHPCRE__")
+endif(PCRE)
+
+if(GKREGEX)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKREGEX")
+endif(GKREGEX)
+
+if(GKRAND)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKRAND")
+endif(GKRAND)
+
+
+# Check for features.
+check_include_file(execinfo.h HAVE_EXECINFO_H)
+if(HAVE_EXECINFO_H)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_EXECINFO_H")
+endif(HAVE_EXECINFO_H)
+
+check_function_exists(getline HAVE_GETLINE)
+if(HAVE_GETLINE)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_GETLINE")
+endif(HAVE_GETLINE)
+
+
+# Custom check for TLS.
+if(MSVC)
+   set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=__declspec(thread)")
+
+  # This if checks if that value is cached or not.
+  if("${HAVE_THREADLOCALSTORAGE}" MATCHES "^${HAVE_THREADLOCALSTORAGE}$")
+    try_compile(HAVE_THREADLOCALSTORAGE
+      ${CMAKE_BINARY_DIR}
+      ${GKLIB_PATH}/conf/check_thread_storage.c)
+    if(HAVE_THREADLOCALSTORAGE)
+      message(STATUS "checking for thread-local storage - found")
+    else()
+      message(STATUS "checking for thread-local storage - not found")
+    endif()
+  endif()
+  if(NOT HAVE_THREADLOCALSTORAGE)
+    set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=")
+  endif()
+endif()
+
+# Finally set the official C flags.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GKlib_COPTIONS} ${GKlib_COPTS}")
+
+# Find GKlib sources.
+file(GLOB GKlib_sources ${GKLIB_PATH}/*.c)
+file(GLOB GKlib_includes ${GKLIB_PATH}/*.h)
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..b61ca6f
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,18 @@
+
+Copyright & License Notice
+---------------------------
+
+Copyright 1995-2018, Regents of the University of Minnesota
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
+implied. See the License for the specific language governing 
+permissions and limitations under the License.
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..6ac97b9
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,87 @@
+# Configuration options.
+cc       = gcc
+prefix   = ~/local
+openmp   = not-set
+gdb      = not-set
+assert   = not-set
+assert2  = not-set
+debug    = not-set
+gprof    = not-set
+valgrind = not-set
+pcre     = not-set
+gkregex  = not-set
+gkrand   = not-set
+
+
+# Basically proxies everything to the builddir cmake.
+cputype = $(shell uname -m | sed "s/\\ /_/g")
+systype = $(shell uname -s)
+
+BUILDDIR = build/$(systype)-$(cputype)
+
+# Process configuration options.
+CONFIG_FLAGS = -DCMAKE_VERBOSE_MAKEFILE=1
+ifneq ($(gdb), not-set)
+    CONFIG_FLAGS += -DGDB=$(gdb)
+endif
+ifneq ($(assert), not-set)
+    CONFIG_FLAGS += -DASSERT=$(assert)
+endif
+ifneq ($(assert2), not-set)
+    CONFIG_FLAGS += -DASSERT2=$(assert2)
+endif
+ifneq ($(debug), not-set)
+    CONFIG_FLAGS += -DDEBUG=$(debug)
+endif
+ifneq ($(gprof), not-set)
+    CONFIG_FLAGS += -DGPROF=$(gprof)
+endif
+ifneq ($(valgrind), not-set)
+    CONFIG_FLAGS += -DVALGRIND=$(valgrind)
+endif
+ifneq ($(openmp), not-set)
+    CONFIG_FLAGS += -DOPENMP=$(openmp)
+endif
+ifneq ($(pcre), not-set)
+    CONFIG_FLAGS += -DPCRE=$(pcre)
+endif
+ifneq ($(gkregex), not-set)
+    CONFIG_FLAGS += -DGKREGEX=$(pcre)
+endif
+ifneq ($(gkrand), not-set)
+    CONFIG_FLAGS += -DGKRAND=$(pcre)
+endif
+ifneq ($(prefix), not-set)
+    CONFIG_FLAGS += -DCMAKE_INSTALL_PREFIX=$(prefix)
+endif
+ifneq ($(cc), not-set)
+    CONFIG_FLAGS += -DCMAKE_C_COMPILER=$(cc)
+endif
+ifneq ($(cputype), x86_64)
+    CONFIG_FLAGS += -DNO_X86=$(cputype)
+endif
+
+define run-config
+mkdir -p $(BUILDDIR)
+cd $(BUILDDIR) && cmake $(CURDIR) $(CONFIG_FLAGS)
+endef
+
+all clean install: $(BUILDDIR)
+	make -C $(BUILDDIR) $@
+
+uninstall:
+	 xargs rm < $(BUILDDIR)/install_manifest.txt
+
+$(BUILDDIR):
+	$(run-config)
+
+config: distclean
+	$(run-config)
+
+distclean:
+	rm -rf $(BUILDDIR)
+
+remake:
+	find . -name CMakeLists.txt -exec touch {} ';'
+
+.PHONY: config distclean all clean install uninstall remake
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f94eeea
--- /dev/null
+++ b/README.md
@@ -0,0 +1,54 @@
+# GKlib
+A library of various helper routines and frameworks used by many of the lab's software
+
+## Build requirements
+ - CMake 2.8, found at http://www.cmake.org/, as well as GNU make. 
+
+Assuming that the above are available, two commands should suffice to 
+build the software:
+```
+make config 
+make
+```
+
+## Configuring the build
+It is primarily configured by passing options to make config. For example:
+```
+make config cc=icc
+```
+
+would configure it to be built using icc.
+
+Configuration options are:
+```
+cc=[compiler]     - The C compiler to use [default: gcc]
+prefix=[PATH]     - Set the installation prefix [default: ~/local]
+openmp=set        - To build a version with OpenMP support
+```
+
+
+## Building and installing
+To build and install, run the following
+```
+make
+make install
+```
+
+By default, the library file, header file, and binaries will be installed in
+```
+~/local/lib
+~/local/include
+~/local/bin
+```
+
+## Other make commands
+    make uninstall 
+         Removes all files installed by 'make install'.
+   
+    make clean 
+         Removes all object files but retains the configuration options.
+   
+    make distclean 
+         Performs clean and completely removes the build directory.
+
+
diff --git a/SETUP b/SETUP
new file mode 100755
index 0000000..a1d187d
--- /dev/null
+++ b/SETUP
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+if [ -z "$1" ]
+then
+   printf "Usage: setup.sh install_directory [dependencies].\n" 1>&2
+   exit 1
+fi
+
+cd $(dirname $0)
+libname=$(basename $(pwd))
+make config shared=1 prefix="$1"
+make install
diff --git a/b64.c b/b64.c
new file mode 100644
index 0000000..afacd68
--- /dev/null
+++ b/b64.c
@@ -0,0 +1,95 @@
+/*! 
+\file  b64.c
+\brief This file contains some simple 8bit-to-6bit encoding/deconding routines
+
+Most of these routines are outdated and should be converted using glibc's equivalent
+routines.
+
+\date   Started 2/22/05
+\author George
+\version\verbatim $Id: b64.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+
+\verbatim 
+$Copyright$ 
+$License$
+\endverbatim
+
+*/
+
+
+#include "GKlib.h"
+
+#define B64OFFSET       48      /* This is the '0' number */
+
+
+/******************************************************************************
+* Encode 3 '8-bit' binary bytes as 4 '6-bit' characters
+*******************************************************************************/
+void encodeblock(unsigned char *in, unsigned char *out)
+{
+  out[0] = (in[0] >> 2);
+  out[1] = (((in[0] & 0x03) << 4) | (in[1] >> 4));
+  out[2] = (((in[1] & 0x0f) << 2) | (in[2] >> 6));
+  out[3] = (in[2] & 0x3f);
+
+  out[0] += B64OFFSET;
+  out[1] += B64OFFSET;
+  out[2] += B64OFFSET;
+  out[3] += B64OFFSET;
+
+//  printf("%c %c %c %c %2x %2x %2x %2x %2x %2x %2x\n", out[0], out[1], out[2], out[3], out[0], out[1], out[2], out[3], in[0], in[1], in[2]);
+}
+
+/******************************************************************************
+* Decode 4 '6-bit' characters into 3 '8-bit' binary bytes
+*******************************************************************************/
+void decodeblock(unsigned char *in, unsigned char *out)
+{   
+  in[0] -= B64OFFSET;
+  in[1] -= B64OFFSET;
+  in[2] -= B64OFFSET;
+  in[3] -= B64OFFSET;
+
+  out[0] = (in[0] << 2 | in[1] >> 4);
+  out[1] = (in[1] << 4 | in[2] >> 2);
+  out[2] = (in[2] << 6 | in[3]);
+}
+
+
+/******************************************************************************
+* This function encodes an input array of bytes into a base64 encoding. Memory
+* for the output array is assumed to have been allocated by the calling program
+* and be sufficiently large. The output string is NULL terminated.
+*******************************************************************************/
+void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer)
+{
+  int i, j;
+
+  if (nbytes%3 != 0)
+    gk_errexit(SIGERR, "GKEncodeBase64: Input buffer size should be a multiple of 3! (%d)\n", nbytes);
+
+  for (j=0, i=0; i<nbytes; i+=3, j+=4) 
+    encodeblock(inbuffer+i, outbuffer+j);
+
+//printf("%d %d\n", nbytes, j);
+  outbuffer[j] = '\0';
+}
+
+
+
+/******************************************************************************
+* This function decodes an input array of base64 characters into their actual
+* 8-bit codes. Memory * for the output array is assumed to have been allocated 
+* by the calling program and be sufficiently large. The padding is discarded.
+*******************************************************************************/
+void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer)
+{
+  int i, j;
+
+  if (nbytes%4 != 0)
+    gk_errexit(SIGERR, "GKDecodeBase64: Input buffer size should be a multiple of 4! (%d)\n", nbytes);
+
+  for (j=0, i=0; i<nbytes; i+=4, j+=3) 
+    decodeblock(inbuffer+i, outbuffer+j);
+}
+
diff --git a/blas.c b/blas.c
new file mode 100644
index 0000000..a0b95ca
--- /dev/null
+++ b/blas.c
@@ -0,0 +1,37 @@
+/*!
+\file blas.c
+\brief This file contains GKlib's implementation of BLAS-like routines
+
+The BLAS routines that are currently implemented are mostly level-one.
+They follow a naming convention of the type gk_[type][name], where
+[type] is one of c, i, f, and d, based on C's four standard scalar
+datatypes of characters, integers, floats, and doubles.
+
+These routines are implemented using a generic macro template,
+which is used for code generation.
+
+\date   Started 9/28/95
+\author George
+\version\verbatim $Id: blas.c 14330 2013-05-18 12:15:15Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************/
+/*! Use the templates to generate BLAS routines for the scalar data types */
+/*************************************************************************/
+GK_MKBLAS(gk_c,   char,     int)
+GK_MKBLAS(gk_i,   int,      int)
+GK_MKBLAS(gk_i32, int32_t,  int32_t)
+GK_MKBLAS(gk_i64, int64_t,  int64_t)
+GK_MKBLAS(gk_z,   ssize_t,  ssize_t)
+GK_MKBLAS(gk_zu,  size_t,   size_t)
+GK_MKBLAS(gk_f,   float,    float)
+GK_MKBLAS(gk_d,   double,   double)
+GK_MKBLAS(gk_idx, gk_idx_t, gk_idx_t)
+
+
+
+
diff --git a/cache.c b/cache.c
new file mode 100644
index 0000000..932e36d
--- /dev/null
+++ b/cache.c
@@ -0,0 +1,126 @@
+/*!
+\file 
+\brief Functions dealing with simulating cache behavior for performance
+       modeling and analysis;
+
+\date Started 4/13/18
+\author George
+\author Copyright 1997-2011, Regents of the University of Minnesota 
+\version $Id: cache.c 21991 2018-04-16 03:08:12Z karypis $
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! This function creates a cache 
+ */
+/*************************************************************************/
+gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits)
+{
+  gk_cache_t *cache;
+
+  cache = (gk_cache_t *)gk_malloc(sizeof(gk_cache_t), "gk_cacheCreate: cache");
+  memset(cache, 0, sizeof(gk_cache_t));
+
+  cache->nway   = nway;
+  cache->lnbits = lnbits;
+  cache->cnbits = cnbits;
+  cache->csize  = 1<<cnbits;
+  cache->cmask  = cache->csize-1;
+
+  cache->latimes = gk_ui64smalloc(cache->csize*nway, 0, "gk_cacheCreate: latimes");
+  cache->clines  = gk_zusmalloc(cache->csize*nway, 0, "gk_cacheCreate: clines");
+
+  return cache;
+}
+
+
+/*************************************************************************/
+/*! This function resets a cache 
+ */
+/*************************************************************************/
+void gk_cacheReset(gk_cache_t *cache)
+{
+  cache->nhits   = 0;
+  cache->nmisses = 0;
+
+  gk_ui64set(cache->csize*cache->nway, 0, cache->latimes);
+  gk_zuset(cache->csize*cache->nway, 0, cache->clines);
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! This function destroys a cache.
+ */
+/*************************************************************************/
+void gk_cacheDestroy(gk_cache_t **r_cache)
+{
+  gk_cache_t *cache = *r_cache;
+
+  if (cache == NULL)
+    return;
+
+  gk_free((void **)&cache->clines, &cache->latimes, &cache, LTERM);
+
+  *r_cache = NULL;
+}
+
+
+/*************************************************************************/
+/*! This function simulates a load(ptr) operation.
+ */
+/*************************************************************************/
+int gk_cacheLoad(gk_cache_t *cache, size_t addr)
+{
+  uint32_t i, nway=cache->nway;
+  size_t lru=0;
+
+  //printf("%16"PRIx64" ", (uint64_t)addr);
+  addr = addr>>(cache->lnbits);
+  //printf("%16"PRIx64" %16"PRIx64" %16"PRIx64" ", (uint64_t)addr, (uint64_t)addr&(cache->cmask), (uint64_t)cache->cmask);
+
+  size_t *clines    = cache->clines  + (addr&(cache->cmask));
+  uint64_t *latimes = cache->latimes + (addr&(cache->cmask));
+
+  cache->clock++;
+  for (i=0; i<nway; i++) { /* look for hits */
+    if (clines[i] == addr) { 
+      cache->nhits++;
+      latimes[i] = cache->clock;
+      goto DONE;
+    }
+  }
+
+  for (i=0; i<nway; i++) { /* look for empty spots or the lru spot */
+    if (clines[i] == 0) {
+      lru = i;
+      break;
+    }
+    else if (latimes[i] < latimes[lru]) {
+      lru = i;
+    }
+  }
+
+  /* initial fill or replace */
+  cache->nmisses++;
+  clines[lru]  = addr;
+  latimes[lru] = cache->clock;
+
+DONE:
+  //printf(" %"PRIu64" %"PRIu64"\n", cache->nhits, cache->clock);
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function returns the cache's hitrate
+ */
+/*************************************************************************/
+double gk_cacheGetHitRate(gk_cache_t *cache)
+{
+  return ((double)cache->nhits)/((double)(cache->clock+1));
+}
+
diff --git a/conf/check_thread_storage.c b/conf/check_thread_storage.c
new file mode 100644
index 0000000..e6e1e98
--- /dev/null
+++ b/conf/check_thread_storage.c
@@ -0,0 +1,5 @@
+extern __thread int x;
+
+int main(int argc, char **argv) {
+  return 0;
+}
diff --git a/csr.c b/csr.c
new file mode 100644
index 0000000..7e92a0c
--- /dev/null
+++ b/csr.c
@@ -0,0 +1,3378 @@
+/*!
+ * \file 
+ *
+ * \brief Various routines with dealing with CSR matrices
+ *
+ * \author George Karypis
+ * \version\verbatim $Id: csr.c 21044 2017-05-24 22:50:32Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+#define OMPMINOPS       50000
+
+/*************************************************************************/
+/*! Allocate memory for a CSR matrix and initializes it 
+    \returns the allocated matrix. The various fields are set to NULL.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Create()
+{
+  gk_csr_t *mat=NULL;
+
+  if ((mat = (gk_csr_t *)gk_malloc(sizeof(gk_csr_t), "gk_csr_Create: mat")))
+    gk_csr_Init(mat);
+
+  return mat;
+}
+
+
+/*************************************************************************/
+/*! Initializes the matrix 
+    \param mat is the matrix to be initialized.
+*/
+/*************************************************************************/
+void gk_csr_Init(gk_csr_t *mat)
+{
+  memset(mat, 0, sizeof(gk_csr_t));
+  mat->nrows = mat->ncols = 0;
+}
+
+
+/*************************************************************************/
+/*! Frees all the memory allocated for matrix.
+    \param mat is the matrix to be freed.
+*/
+/*************************************************************************/
+void gk_csr_Free(gk_csr_t **mat)
+{
+  if (*mat == NULL)
+    return;
+  gk_csr_FreeContents(*mat);
+  gk_free((void **)mat, LTERM);
+}
+
+
+/*************************************************************************/
+/*! Frees only the memory allocated for the matrix's different fields and
+    sets them to NULL.
+    \param mat is the matrix whose contents will be freed.
+*/    
+/*************************************************************************/
+void gk_csr_FreeContents(gk_csr_t *mat)
+{
+  gk_free((void *)&mat->rowptr, &mat->rowind, &mat->rowval, 
+      &mat->rowids, &mat->rlabels, &mat->rmap,
+      &mat->colptr, &mat->colind, &mat->colval, 
+      &mat->colids, &mat->clabels, &mat->cmap,
+      &mat->rnorms, &mat->cnorms, &mat->rsums, &mat->csums, 
+      &mat->rsizes, &mat->csizes, &mat->rvols, &mat->cvols, 
+      &mat->rwgts, &mat->cwgts, 
+          LTERM);
+}
+
+
+/*************************************************************************/
+/*! Returns a copy of a matrix.
+    \param mat is the matrix to be duplicated.
+    \returns the newly created copy of the matrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Dup(gk_csr_t *mat)
+{
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows  = mat->nrows;
+  nmat->ncols  = mat->ncols;
+
+  /* copy the row structure */
+  if (mat->rowptr)
+    nmat->rowptr = gk_zcopy(mat->nrows+1, mat->rowptr, 
+                            gk_zmalloc(mat->nrows+1, "gk_csr_Dup: rowptr"));
+  if (mat->rowids)
+    nmat->rowids = gk_icopy(mat->nrows, mat->rowids, 
+                            gk_imalloc(mat->nrows, "gk_csr_Dup: rowids"));
+  if (mat->rlabels)
+    nmat->rlabels = gk_icopy(mat->nrows, mat->rlabels, 
+                            gk_imalloc(mat->nrows, "gk_csr_Dup: rlabels"));
+  if (mat->rnorms)
+    nmat->rnorms = gk_fcopy(mat->nrows, mat->rnorms, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rnorms"));
+  if (mat->rsums)
+    nmat->rsums = gk_fcopy(mat->nrows, mat->rsums, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rsums"));
+  if (mat->rsizes)
+    nmat->rsizes = gk_fcopy(mat->nrows, mat->rsizes, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rsizes"));
+  if (mat->rvols)
+    nmat->rvols = gk_fcopy(mat->nrows, mat->rvols, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rvols"));
+  if (mat->rwgts)
+    nmat->rwgts = gk_fcopy(mat->nrows, mat->rwgts, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rwgts"));
+  if (mat->rowind)
+    nmat->rowind = gk_icopy(mat->rowptr[mat->nrows], mat->rowind, 
+                            gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowind"));
+  if (mat->rowval)
+    nmat->rowval = gk_fcopy(mat->rowptr[mat->nrows], mat->rowval, 
+                            gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowval"));
+
+  /* copy the col structure */
+  if (mat->colptr)
+    nmat->colptr = gk_zcopy(mat->ncols+1, mat->colptr, 
+                            gk_zmalloc(mat->ncols+1, "gk_csr_Dup: colptr"));
+  if (mat->colids)
+    nmat->colids = gk_icopy(mat->ncols, mat->colids, 
+                            gk_imalloc(mat->ncols, "gk_csr_Dup: colids"));
+  if (mat->clabels)
+    nmat->clabels = gk_icopy(mat->ncols, mat->clabels, 
+                            gk_imalloc(mat->ncols, "gk_csr_Dup: clabels"));
+  if (mat->cnorms)
+    nmat->cnorms = gk_fcopy(mat->ncols, mat->cnorms, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: cnorms"));
+  if (mat->csums)
+    nmat->csums = gk_fcopy(mat->ncols, mat->csums, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: csums"));
+  if (mat->csizes)
+    nmat->csizes = gk_fcopy(mat->ncols, mat->csizes, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: csizes"));
+  if (mat->cvols)
+    nmat->cvols = gk_fcopy(mat->ncols, mat->cvols, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: cvols"));
+  if (mat->cwgts)
+    nmat->cwgts = gk_fcopy(mat->ncols, mat->cwgts, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: cwgts"));
+  if (mat->colind)
+    nmat->colind = gk_icopy(mat->colptr[mat->ncols], mat->colind, 
+                            gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colind"));
+  if (mat->colval)
+    nmat->colval = gk_fcopy(mat->colptr[mat->ncols], mat->colval, 
+                            gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colval"));
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Returns a submatrix containint a set of consecutive rows.
+    \param mat is the original matrix.
+    \param rstart is the starting row.
+    \param nrows is the number of rows from rstart to extract.
+    \returns the row structure of the newly created submatrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows)
+{
+  ssize_t i;
+  gk_csr_t *nmat;
+
+  if (rstart+nrows > mat->nrows)
+    return NULL;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows  = nrows;
+  nmat->ncols  = mat->ncols;
+
+  /* copy the row structure */
+  if (mat->rowptr)
+    nmat->rowptr = gk_zcopy(nrows+1, mat->rowptr+rstart, 
+                              gk_zmalloc(nrows+1, "gk_csr_ExtractSubmatrix: rowptr"));
+  for (i=nrows; i>=0; i--)
+    nmat->rowptr[i] -= nmat->rowptr[0];
+  ASSERT(nmat->rowptr[0] == 0);
+
+  if (mat->rowids)
+    nmat->rowids = gk_icopy(nrows, mat->rowids+rstart, 
+                            gk_imalloc(nrows, "gk_csr_ExtractSubmatrix: rowids"));
+  if (mat->rnorms)
+    nmat->rnorms = gk_fcopy(nrows, mat->rnorms+rstart, 
+                            gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rnorms"));
+
+  if (mat->rsums)
+    nmat->rsums = gk_fcopy(nrows, mat->rsums+rstart, 
+                            gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rsums"));
+
+  ASSERT(nmat->rowptr[nrows] == mat->rowptr[rstart+nrows]-mat->rowptr[rstart]);
+  if (mat->rowind)
+    nmat->rowind = gk_icopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], 
+                            mat->rowind+mat->rowptr[rstart], 
+                            gk_imalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
+                                       "gk_csr_ExtractSubmatrix: rowind"));
+  if (mat->rowval)
+    nmat->rowval = gk_fcopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], 
+                            mat->rowval+mat->rowptr[rstart], 
+                            gk_fmalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
+                                       "gk_csr_ExtractSubmatrix: rowval"));
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Returns a submatrix containing a certain set of rows.
+    \param mat is the original matrix.
+    \param nrows is the number of rows to extract.
+    \param rind is the set of row numbers to extract.
+    \returns the row structure of the newly created submatrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind)
+{
+  ssize_t i, ii, j, nnz;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows = nrows;
+  nmat->ncols = mat->ncols;
+
+  for (nnz=0, i=0; i<nrows; i++)  
+    nnz += mat->rowptr[rind[i]+1]-mat->rowptr[rind[i]];
+
+  nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr");
+  nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind");
+  nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval");
+
+  nmat->rowptr[0] = 0;
+  for (nnz=0, j=0, ii=0; ii<nrows; ii++) {
+    i = rind[ii];
+    gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz);
+    gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz);
+    nnz += mat->rowptr[i+1]-mat->rowptr[i];
+    nmat->rowptr[++j] = nnz;
+  }
+  ASSERT(j == nmat->nrows);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Returns a submatrix corresponding to a specified partitioning of rows.
+    \param mat is the original matrix.
+    \param part is the partitioning vector of the rows.
+    \param pid is the partition ID that will be extracted.
+    \returns the row structure of the newly created submatrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid)
+{
+  ssize_t i, j, nnz;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows = 0;
+  nmat->ncols = mat->ncols;
+
+  for (nnz=0, i=0; i<mat->nrows; i++) {
+    if (part[i] == pid) {
+      nmat->nrows++;
+      nnz += mat->rowptr[i+1]-mat->rowptr[i];
+    }
+  }
+
+  nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr");
+  nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind");
+  nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval");
+
+  nmat->rowptr[0] = 0;
+  for (nnz=0, j=0, i=0; i<mat->nrows; i++) {
+    if (part[i] == pid) {
+      gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz);
+      gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz);
+      nnz += mat->rowptr[i+1]-mat->rowptr[i];
+      nmat->rowptr[++j] = nnz;
+    }
+  }
+  ASSERT(j == nmat->nrows);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Splits the matrix into multiple sub-matrices based on the provided
+    color array.
+    \param mat is the original matrix.
+    \param color is an array of size equal to the number of non-zeros
+           in the matrix (row-wise structure). The matrix is split into
+           as many parts as the number of colors. For meaningfull results,
+           the colors should be numbered consecutively starting from 0.
+    \returns an array of matrices for each supplied color number.
+*/
+/**************************************************************************/
+gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color)
+{
+  ssize_t i, j;
+  int nrows, ncolors;
+  ssize_t *rowptr;
+  int *rowind;
+  float *rowval;
+  gk_csr_t **smats;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  ncolors = gk_imax(rowptr[nrows], color, 1)+1;
+
+  smats = (gk_csr_t **)gk_malloc(sizeof(gk_csr_t *)*ncolors, "gk_csr_Split: smats");
+  for (i=0; i<ncolors; i++) {
+    smats[i] = gk_csr_Create();
+    smats[i]->nrows  = mat->nrows;
+    smats[i]->ncols  = mat->ncols;
+    smats[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_csr_Split: smats[i]->rowptr"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      smats[color[j]]->rowptr[i]++;
+  }
+  for (i=0; i<ncolors; i++) 
+    MAKECSR(j, nrows, smats[i]->rowptr);
+
+  for (i=0; i<ncolors; i++) {
+    smats[i]->rowind = gk_imalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowind"); 
+    smats[i]->rowval = gk_fmalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowval"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      smats[color[j]]->rowind[smats[color[j]]->rowptr[i]] = rowind[j];
+      smats[color[j]]->rowval[smats[color[j]]->rowptr[i]] = rowval[j];
+      smats[color[j]]->rowptr[i]++;
+    }
+  }
+
+  for (i=0; i<ncolors; i++) 
+    SHIFTCSR(j, nrows, smats[i]->rowptr);
+
+  return smats;
+}
+
+
+/**************************************************************************/
+/*! Determines the format of the CSR matrix based on the extension.
+    \param filename is the name of the file.
+    \param the user-supplied format.
+    \returns the type. The extension of the file directly maps to the
+           name of the format.
+*/
+/**************************************************************************/
+int gk_csr_DetermineFormat(char *filename, int format)
+{
+  if (format != GK_CSR_FMT_AUTO)
+    return format;
+
+  format = GK_CSR_FMT_CSR;
+  char *extension = gk_getextname(filename);
+
+  if (!strcmp(extension, "csr"))
+    format = GK_CSR_FMT_CSR;
+  else if (!strcmp(extension, "ijv"))
+    format = GK_CSR_FMT_IJV;
+  else if (!strcmp(extension, "cluto"))
+    format = GK_CSR_FMT_CLUTO;
+  else if (!strcmp(extension, "metis"))
+    format = GK_CSR_FMT_METIS;
+  else if (!strcmp(extension, "binrow"))
+    format = GK_CSR_FMT_BINROW;
+  else if (!strcmp(extension, "bincol"))
+    format = GK_CSR_FMT_BINCOL;
+  else if (!strcmp(extension, "bijv"))
+    format = GK_CSR_FMT_BIJV;
+
+  gk_free((void **)&extension, LTERM);
+
+  return format;
+}
+
+
+/**************************************************************************/
+/*! Reads a CSR matrix from the supplied file and stores it the matrix's 
+    forward structure.
+    \param filename is the file that stores the data.
+    \param format is either GK_CSR_FMT_METIS, GK_CSR_FMT_CLUTO, 
+           GK_CSR_FMT_CSR, GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL 
+           specifying the type of the input format. 
+           The GK_CSR_FMT_CSR does not contain a header
+           line, whereas the GK_CSR_FMT_BINROW is a binary format written 
+           by gk_csr_Write() using the same format specifier.
+    \param readvals is either 1 or 0, indicating if the CSR file contains
+           values or it does not. It only applies when GK_CSR_FMT_CSR is
+           used.
+    \param numbering is either 1 or 0, indicating if the numbering of the 
+           indices start from 1 or 0, respectively. If they start from 1, 
+           they are automatically decreamented during input so that they
+           will start from 0. It only applies when GK_CSR_FMT_CSR is
+           used.
+    \returns the matrix that was read.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering)
+{
+  ssize_t i, k, l;
+  size_t nfields, nrows, ncols, nnz, fmt, ncon;
+  size_t lnlen;
+  ssize_t *rowptr;
+  int *rowind, *iinds, *jinds, ival;
+  float *rowval=NULL, *vals, fval;
+  int readsizes, readwgts;
+  char *line=NULL, *head, *tail, fmtstr[256];
+  FILE *fpin;
+  gk_csr_t *mat=NULL;
+
+  format = gk_csr_DetermineFormat(filename, format);
+
+  if (!gk_fexists(filename)) 
+    gk_errexit(SIGERR, "File %s does not exist!\n", filename);
+
+  switch (format) {
+    case GK_CSR_FMT_BINROW:
+      mat = gk_csr_Create();
+
+      fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
+      if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
+      if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
+      mat->rowptr = gk_zmalloc(mat->nrows+1, "gk_csr_Read: rowptr");
+      if (fread(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpin) != mat->nrows+1)
+        gk_errexit(SIGERR, "Failed to read the rowptr from file %s!\n", filename);
+      mat->rowind = gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowind");
+      if (fread(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows])
+        gk_errexit(SIGERR, "Failed to read the rowind from file %s!\n", filename);
+      if (readvals == 1) {
+        mat->rowval = gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowval");
+        if (fread(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows])
+          gk_errexit(SIGERR, "Failed to read the rowval from file %s!\n", filename);
+      }
+
+      gk_fclose(fpin);
+      return mat;
+
+      break;
+
+    case GK_CSR_FMT_BINCOL:
+      mat = gk_csr_Create();
+
+      fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
+      if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
+      if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
+      mat->colptr = gk_zmalloc(mat->ncols+1, "gk_csr_Read: colptr");
+      if (fread(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpin) != mat->ncols+1)
+        gk_errexit(SIGERR, "Failed to read the colptr from file %s!\n", filename);
+      mat->colind = gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Read: colind");
+      if (fread(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols])
+        gk_errexit(SIGERR, "Failed to read the colind from file %s!\n", filename);
+      if (readvals) {
+        mat->colval = gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Read: colval");
+        if (fread(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols])
+          gk_errexit(SIGERR, "Failed to read the colval from file %s!\n", filename);
+      }
+
+      gk_fclose(fpin);
+      return mat;
+
+      break;
+
+
+    case GK_CSR_FMT_IJV:
+      gk_getfilestats(filename, &nrows, &nnz, NULL, NULL);
+
+      if (readvals == 1 && 3*nrows != nnz)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 3.\n", nnz, readvals);
+      if (readvals == 0 && 2*nrows != nnz)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 2.\n", nnz, readvals);
+
+      nnz = nrows;
+      numbering = (numbering ? - 1 : 0);
+
+      /* read the data into three arrays */
+      iinds = gk_i32malloc(nnz, "iinds");
+      jinds = gk_i32malloc(nnz, "jinds");
+      vals  = (readvals ? gk_fmalloc(nnz, "vals") : NULL);
+
+      fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+      for (nrows=0, ncols=0, i=0; i<nnz; i++) {
+        if (readvals) {
+          if (fscanf(fpin, "%d %d %f", &iinds[i], &jinds[i], &vals[i]) != 3)
+            gk_errexit(SIGERR, "Error: Failed to read (i, j, val) for nnz: %zd.\n", i);
+        }
+        else {
+          if (fscanf(fpin, "%d %d", &iinds[i], &jinds[i]) != 2)
+            gk_errexit(SIGERR, "Error: Failed to read (i, j) value for nnz: %zd.\n", i);
+        }
+        iinds[i] += numbering;
+        jinds[i] += numbering;
+
+        if (nrows < iinds[i])
+          nrows = iinds[i];
+        if (ncols < jinds[i])
+          ncols = jinds[i];
+      }
+      nrows++;
+      ncols++;
+      gk_fclose(fpin);
+
+      /* convert (i, j, v) into a CSR matrix */
+      mat = gk_csr_Create();
+      mat->nrows = nrows;
+      mat->ncols = ncols;
+      rowptr = mat->rowptr = gk_zsmalloc(nrows+1, 0, "rowptr");
+      rowind = mat->rowind = gk_i32malloc(nnz, "rowind");
+      if (readvals)
+        rowval = mat->rowval = gk_fmalloc(nnz, "rowval");
+
+      for (i=0; i<nnz; i++)
+        rowptr[iinds[i]]++;
+      MAKECSR(i, nrows, rowptr);
+
+      for (i=0; i<nnz; i++) {
+        rowind[rowptr[iinds[i]]] = jinds[i];
+        if (readvals)
+          rowval[rowptr[iinds[i]]] = vals[i];
+        rowptr[iinds[i]]++;
+      }
+      SHIFTCSR(i, nrows, rowptr);
+
+      gk_free((void **)&iinds, &jinds, &vals, LTERM);
+
+      return mat;
+
+      break;
+
+    case GK_CSR_FMT_BIJV:
+      mat = gk_csr_Create();
+
+      fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
+
+      if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
+      if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
+      if (fread(&nnz, sizeof(size_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the nnz from file %s!\n", filename);
+      if (fread(&readvals, sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the readvals from file %s!\n", filename);
+
+      /* read the data into three arrays */
+      iinds = gk_i32malloc(nnz, "iinds");
+      jinds = gk_i32malloc(nnz, "jinds");
+      vals  = (readvals ? gk_fmalloc(nnz, "vals") : NULL);
+
+      for (i=0; i<nnz; i++) {
+        if (fread(&(iinds[i]), sizeof(int32_t), 1, fpin) != 1)
+          gk_errexit(SIGERR, "Failed to read iinds[i] from file %s!\n", filename);
+        if (fread(&(jinds[i]), sizeof(int32_t), 1, fpin) != 1)
+          gk_errexit(SIGERR, "Failed to read jinds[i] from file %s!\n", filename);
+        if (readvals) {
+          if (fread(&(vals[i]), sizeof(float), 1, fpin) != 1)
+            gk_errexit(SIGERR, "Failed to read vals[i] from file %s!\n", filename);
+        }
+        //printf("%d %d\n", iinds[i], jinds[i]);
+      }
+      gk_fclose(fpin);
+
+      /* convert (i, j, v) into a CSR matrix */
+      rowptr = mat->rowptr = gk_zsmalloc(mat->nrows+1, 0, "rowptr");
+      rowind = mat->rowind = gk_i32malloc(nnz, "rowind");
+      if (readvals)
+        rowval = mat->rowval = gk_fmalloc(nnz, "rowval");
+
+      for (i=0; i<nnz; i++)
+        rowptr[iinds[i]]++;
+      MAKECSR(i, mat->nrows, rowptr);
+
+      for (i=0; i<nnz; i++) {
+        rowind[rowptr[iinds[i]]] = jinds[i];
+        if (readvals)
+          rowval[rowptr[iinds[i]]] = vals[i];
+        rowptr[iinds[i]]++;
+      }
+      SHIFTCSR(i, mat->nrows, rowptr);
+
+      gk_free((void **)&iinds, &jinds, &vals, LTERM);
+
+      return mat;
+
+      break;
+
+
+    /* the following are handled by a common input code, that comes after the switch */
+
+    case GK_CSR_FMT_CLUTO:
+      fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+      do {
+        if (gk_getline(&line, &lnlen, fpin) <= 0)
+          gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
+      } while (line[0] == '%');
+
+      if (sscanf(line, "%zu %zu %zu", &nrows, &ncols, &nnz) != 3)
+        gk_errexit(SIGERR, "Header line must contain 3 integers.\n");
+
+      readsizes = 0;
+      readwgts  = 0;
+      readvals  = 1;
+      numbering = 1;
+
+      break;
+
+    case GK_CSR_FMT_METIS:
+      fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+      do {
+        if (gk_getline(&line, &lnlen, fpin) <= 0)
+          gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
+      } while (line[0] == '%');
+
+      fmt = ncon = 0;
+      nfields = sscanf(line, "%zu %zu %zu %zu", &nrows, &nnz, &fmt, &ncon);
+      if (nfields < 2)
+        gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n");
+
+      ncols = nrows;
+      nnz *= 2;
+
+      if (fmt > 111)
+        gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt);
+
+      sprintf(fmtstr, "%03zu", fmt%1000);
+      readsizes = (fmtstr[0] == '1');
+      readwgts  = (fmtstr[1] == '1');
+      readvals  = (fmtstr[2] == '1');
+      numbering = 1;
+      ncon      = (ncon == 0 ? 1 : ncon);
+
+      break;
+
+    case GK_CSR_FMT_CSR:
+      readsizes = 0;
+      readwgts  = 0;
+
+      gk_getfilestats(filename, &nrows, &nnz, NULL, NULL);
+
+      if (readvals == 1 && nnz%2 == 1)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not even.\n", nnz, readvals);
+      if (readvals == 1)
+        nnz = nnz/2;
+      fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown csr format.\n");
+      return NULL;
+  }
+
+  mat = gk_csr_Create();
+
+  mat->nrows = nrows;
+
+  rowptr = mat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Read: rowptr");
+  rowind = mat->rowind = gk_imalloc(nnz, "gk_csr_Read: rowind");
+  if (readvals != 2)
+    rowval = mat->rowval = gk_fsmalloc(nnz, 1.0, "gk_csr_Read: rowval");
+
+  if (readsizes)
+    mat->rsizes = gk_fsmalloc(nrows, 0.0, "gk_csr_Read: rsizes");
+
+  if (readwgts)
+    mat->rwgts = gk_fsmalloc(nrows*ncon, 0.0, "gk_csr_Read: rwgts");
+
+  /*----------------------------------------------------------------------
+   * Read the sparse matrix file
+   *---------------------------------------------------------------------*/
+  numbering = (numbering ? -1 : 0);
+  for (ncols=0, rowptr[0]=0, k=0, i=0; i<nrows; i++) {
+    do {
+      if (gk_getline(&line, &lnlen, fpin) == -1)
+        gk_errexit(SIGERR, "Premature end of input file: file while reading row %d\n", i);
+    } while (line[0] == '%');
+
+    head = line;
+    tail = NULL;
+
+    /* Read vertex sizes */
+    if (readsizes) {
+#ifdef __MSC__
+      mat->rsizes[i] = (float)strtod(head, &tail);
+#else
+      mat->rsizes[i] = strtof(head, &tail);
+#endif
+      if (tail == head)
+        gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
+      if (mat->rsizes[i] < 0)
+        errexit("The size for vertex %zd must be >= 0\n", i+1);
+      head = tail;
+    }
+
+    /* Read vertex weights */
+    if (readwgts) {
+      for (l=0; l<ncon; l++) {
+#ifdef __MSC__
+        mat->rwgts[i*ncon+l] = (float)strtod(head, &tail);
+#else
+        mat->rwgts[i*ncon+l] = strtof(head, &tail);
+#endif
+        if (tail == head)
+          errexit("The line for vertex %zd does not have enough weights "
+                  "for the %d constraints.\n", i+1, ncon);
+        if (mat->rwgts[i*ncon+l] < 0)
+          errexit("The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
+        head = tail;
+      }
+    }
+
+   
+    /* Read the rest of the row */
+    while (1) {
+      ival = (int)strtol(head, &tail, 0);
+      if (tail == head) 
+        break;
+      head = tail;
+      
+      if ((rowind[k] = ival + numbering) < 0)
+        gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i);
+
+      ncols = gk_max(rowind[k], ncols);
+
+      if (readvals == 1) {
+#ifdef __MSC__
+        fval = (float)strtod(head, &tail);
+#else
+	fval = strtof(head, &tail);
+#endif
+        if (tail == head)
+          gk_errexit(SIGERR, "Value could not be found for column! Row:%zd, NNZ:%zd\n", i, k);
+        head = tail;
+
+        rowval[k] = fval;
+      }
+      k++;
+    }
+    rowptr[i+1] = k;
+  }
+
+  if (format == GK_CSR_FMT_METIS) {
+    ASSERT(ncols+1 == mat->nrows);
+    mat->ncols = mat->nrows;
+  }
+  else {
+    mat->ncols = ncols+1;
+  }
+
+  if (k != nnz)
+    gk_errexit(SIGERR, "gk_csr_Read: Something wrong with the number of nonzeros in "
+                       "the input file. NNZ=%zd, ActualNNZ=%zd.\n", nnz, k);
+
+  gk_fclose(fpin);
+
+  gk_free((void **)&line, LTERM);
+
+  return mat;
+}
+
+
+/**************************************************************************/
+/*! Writes the row-based structure of a matrix into a file.
+    \param mat is the matrix to be written,
+    \param filename is the name of the output file.
+    \param format is one of: GK_CSR_FMT_CLUTO, GK_CSR_FMT_CSR, 
+           GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL, GK_CSR_FMT_BIJV.
+    \param writevals is either 1 or 0 indicating if the values will be 
+           written or not. This is only applicable when GK_CSR_FMT_CSR
+           is used.
+    \param numbering is either 1 or 0 indicating if the internal 0-based 
+           numbering will be shifted by one or not during output. This 
+           is only applicable when GK_CSR_FMT_CSR is used.
+*/
+/**************************************************************************/
+void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering)
+{
+  ssize_t i, j;
+  int32_t edge[2];
+  FILE *fpout;
+
+  format = gk_csr_DetermineFormat(filename, format);
+
+  switch (format) {
+    case GK_CSR_FMT_METIS:
+      if (mat->nrows != mat->ncols || mat->rowptr[mat->nrows]%2 == 1)
+        gk_errexit(SIGERR, "METIS output format requires a square symmetric matrix.\n");
+
+      if (filename)
+        fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout");
+      else
+        fpout = stdout; 
+
+      fprintf(fpout, "%d %zd\n", mat->nrows, mat->rowptr[mat->nrows]/2);
+      for (i=0; i<mat->nrows; i++) {
+        for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) 
+          fprintf(fpout, " %d", mat->rowind[j]+1);
+        fprintf(fpout, "\n");
+      }
+      if (filename)
+        gk_fclose(fpout);
+      break;
+
+    case GK_CSR_FMT_BINROW:
+      if (filename == NULL)
+        gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+      fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
+
+      fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); 
+      fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); 
+      fwrite(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpout); 
+      fwrite(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpout); 
+      if (writevals)
+        fwrite(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpout); 
+
+      gk_fclose(fpout);
+      return;
+
+      break;
+
+    case GK_CSR_FMT_BINCOL:
+      if (filename == NULL)
+        gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+      fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
+
+      fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); 
+      fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); 
+      fwrite(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpout); 
+      fwrite(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpout); 
+      if (writevals) 
+        fwrite(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpout); 
+
+      gk_fclose(fpout);
+      return;
+
+      break;
+
+    case GK_CSR_FMT_IJV:
+      if (filename == NULL)
+        gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+      fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout");
+
+      numbering = (numbering ? 1 : 0);
+      for (i=0; i<mat->nrows; i++) {
+        for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+          if (writevals)
+            fprintf(fpout, "%zd %d %.8f\n", i+numbering, mat->rowind[j]+numbering, mat->rowval[j]);
+          else
+            fprintf(fpout, "%zd %d\n", i+numbering, mat->rowind[j]+numbering);
+        }
+      }
+
+      gk_fclose(fpout);
+      return;
+
+      break;
+
+    case GK_CSR_FMT_BIJV:
+      if (filename == NULL)
+        gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+      fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
+
+      fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); 
+      fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); 
+      fwrite(&(mat->rowptr[mat->nrows]), sizeof(size_t), 1, fpout); 
+      fwrite(&writevals, sizeof(int32_t), 1, fpout); 
+
+      for (i=0; i<mat->nrows; i++) {
+        edge[0] = i;
+        for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+          edge[1] = mat->rowind[j];
+          fwrite(edge, sizeof(int32_t), 2, fpout);
+          if (writevals) 
+            fwrite(&(mat->rowval[j]), sizeof(float), 1, fpout);
+        }
+      }
+
+      gk_fclose(fpout);
+      return;
+
+      break;
+
+    default:
+      if (filename)
+        fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout");
+      else
+        fpout = stdout; 
+
+      if (format == GK_CSR_FMT_CLUTO) {
+        fprintf(fpout, "%d %d %zd\n", mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
+        writevals = 1;
+        numbering = 1;
+      }
+
+      for (i=0; i<mat->nrows; i++) {
+        for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+          fprintf(fpout, " %d", mat->rowind[j]+(numbering ? 1 : 0));
+          if (writevals) 
+            fprintf(fpout, " %f", mat->rowval[j]);
+        }
+        fprintf(fpout, "\n");
+      }
+      if (filename)
+        gk_fclose(fpout);
+  }
+}
+
+
+/*************************************************************************/
+/*! Prunes certain rows/columns of the matrix. The prunning takes place 
+    by analyzing the row structure of the matrix. The prunning takes place
+    by removing rows/columns but it does not affect the numbering of the
+    remaining rows/columns.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param minf is the minimum number of rows (columns) that a column (row) must
+           be present in order to be kept,
+    \param maxf is the maximum number of rows (columns) that a column (row) must
+          be present at in order to be kept.
+    \returns the prunned matrix consisting only of its row-based structure. 
+          The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf)
+{
+  ssize_t i, j, nnz;
+  int nrows, ncols;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind, *collen;
+  float *rowval, *nrowval;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+  
+  nrows = nmat->nrows = mat->nrows;
+  ncols = nmat->ncols = mat->ncols;
+
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Prune: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Prune: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_Prune: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      collen = gk_ismalloc(ncols, 0, "gk_csr_Prune: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          ASSERT(rowind[j] < ncols);
+          collen[rowind[j]]++;
+        }
+      }
+      for (i=0; i<ncols; i++)
+        collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0);
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          if (collen[rowind[j]]) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+            nnz++;
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      gk_free((void **)&collen, LTERM);
+      break;
+
+    case GK_CSR_ROW:
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Eliminates certain entries from the rows/columns of the matrix. The 
+    filtering takes place by keeping only the highest weight entries whose
+    sum accounts for a certain fraction of the overall weight of the 
+    row/column.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param norm indicates the norm that will be used to aggregate the weights
+           and possible values are 1 or 2,
+    \param fraction is the fraction of the overall norm that will be retained
+           by the kept entries.
+    \returns the filtered matrix consisting only of its row-based structure. 
+           The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction)
+{
+  ssize_t i, j, nnz;
+  int nrows, ncols, ncand, maxlen=0;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind;
+  float *rowval, *colval, *nrowval, rsum, tsum;
+  gk_csr_t *nmat;
+  gk_fkv_t *cand;
+
+  nmat = gk_csr_Create();
+  
+  nrows = nmat->nrows = mat->nrows;
+  ncols = nmat->ncols = mat->ncols;
+
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colval = mat->colval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      if (mat->colptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n");
+
+      gk_zcopy(nrows+1, rowptr, nrowptr);
+
+      for (i=0; i<ncols; i++) 
+        maxlen = gk_max(maxlen, colptr[i+1]-colptr[i]);
+
+      #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
+      {
+        cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand");
+
+        #pragma omp for schedule(static)
+        for (i=0; i<ncols; i++) {
+          for (tsum=0.0, ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) {
+            cand[ncand].val = colind[j];
+            cand[ncand].key = colval[j];
+            tsum += (norm == 1 ? colval[j] : colval[j]*colval[j]);
+          }
+          gk_fkvsortd(ncand, cand);
+
+          for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) {
+            rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key);
+            nrowind[nrowptr[cand[j].val]] = i;
+            nrowval[nrowptr[cand[j].val]] = cand[j].key;
+            nrowptr[cand[j].val]++;
+          }
+        }
+
+        gk_free((void **)&cand, LTERM);
+      }
+
+      /* compact the nrowind/nrowval */
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) {
+          nrowind[nnz] = nrowind[j];
+          nrowval[nnz] = nrowval[j];
+        }
+        nrowptr[i] = nnz;
+      }
+      SHIFTCSR(i, nrows, nrowptr);
+
+      break;
+
+    case GK_CSR_ROW:
+      if (mat->rowptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
+
+      for (i=0; i<nrows; i++) 
+        maxlen = gk_max(maxlen, rowptr[i+1]-rowptr[i]);
+
+      #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
+      {
+        cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand");
+
+        #pragma omp for schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (tsum=0.0, ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) {
+            cand[ncand].val = rowind[j];
+            cand[ncand].key = rowval[j];
+            tsum += (norm == 1 ? rowval[j] : rowval[j]*rowval[j]);
+          }
+          gk_fkvsortd(ncand, cand);
+
+          for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) {
+            rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key);
+            nrowind[rowptr[i]+j] = cand[j].val;
+            nrowval[rowptr[i]+j] = cand[j].key;
+          }
+          nrowptr[i+1] = rowptr[i]+j;
+        }
+
+        gk_free((void **)&cand, LTERM);
+      }
+
+      /* compact nrowind/nrowval */
+      nrowptr[0] = nnz = 0;
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<nrowptr[i+1]; j++, nnz++) {
+          nrowind[nnz] = nrowind[j];
+          nrowval[nnz] = nrowval[j];
+        }
+        nrowptr[i+1] = nnz;
+      }
+
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Eliminates certain entries from the rows/columns of the matrix. The 
+    filtering takes place by keeping only the highest weight top-K entries 
+    along each row/column and those entries whose weight is greater than
+    a specified value.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param topk is the number of the highest weight entries to keep.
+    \param keepval is the weight of a term above which will be kept. This
+           is used to select additional terms past the first topk.
+    \returns the filtered matrix consisting only of its row-based structure. 
+           The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval)
+{
+  ssize_t i, j, k, nnz;
+  int nrows, ncols, ncand;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind;
+  float *rowval, *colval, *nrowval;
+  gk_csr_t *nmat;
+  gk_fkv_t *cand;
+
+  nmat = gk_csr_Create();
+  
+  nrows = nmat->nrows = mat->nrows;
+  ncols = nmat->ncols = mat->ncols;
+
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colval = mat->colval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      if (mat->colptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n");
+
+      cand = gk_fkvmalloc(nrows, "gk_csr_LowFilter: cand");
+
+      gk_zcopy(nrows+1, rowptr, nrowptr);
+      for (i=0; i<ncols; i++) {
+        for (ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) {
+          cand[ncand].val = colind[j];
+          cand[ncand].key = colval[j];
+        }
+        gk_fkvsortd(ncand, cand);
+
+        k = gk_min(topk, ncand);
+        for (j=0; j<k; j++) {
+          nrowind[nrowptr[cand[j].val]] = i;
+          nrowval[nrowptr[cand[j].val]] = cand[j].key;
+          nrowptr[cand[j].val]++;
+        }
+        for (; j<ncand; j++) {
+          if (cand[j].key < keepval) 
+            break;
+
+          nrowind[nrowptr[cand[j].val]] = i;
+          nrowval[nrowptr[cand[j].val]] = cand[j].key;
+          nrowptr[cand[j].val]++;
+        }
+      }
+
+      /* compact the nrowind/nrowval */
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) {
+          nrowind[nnz] = nrowind[j];
+          nrowval[nnz] = nrowval[j];
+        }
+        nrowptr[i] = nnz;
+      }
+      SHIFTCSR(i, nrows, nrowptr);
+
+      gk_free((void **)&cand, LTERM);
+      break;
+
+    case GK_CSR_ROW:
+      if (mat->rowptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
+
+      cand = gk_fkvmalloc(ncols, "gk_csr_LowFilter: cand");
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) {
+          cand[ncand].val = rowind[j];
+          cand[ncand].key = rowval[j];
+        }
+        gk_fkvsortd(ncand, cand);
+
+        k = gk_min(topk, ncand);
+        for (j=0; j<k; j++, nnz++) {
+          nrowind[nnz] = cand[j].val;
+          nrowval[nnz] = cand[j].key;
+        }
+        for (; j<ncand; j++, nnz++) {
+          if (cand[j].key < keepval) 
+            break;
+
+          nrowind[nnz] = cand[j].val;
+          nrowval[nnz] = cand[j].key;
+        }
+        nrowptr[i+1] = nnz;
+      }
+
+      gk_free((void **)&cand, LTERM);
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Eliminates certain entries from the rows/columns of the matrix. The 
+    filtering takes place by keeping only the terms whose contribution to
+    the total length of the document is greater than a user-splied multiple
+    over the average.
+
+    This routine assumes that the vectors are normalized to be unit length.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param zscore is the multiplicative factor over the average contribution 
+           to the length of the document.
+    \returns the filtered matrix consisting only of its row-based structure. 
+           The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore)
+{
+  ssize_t i, j, nnz;
+  int nrows;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind;
+  float *rowval, *nrowval, avgwgt;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+  
+  nmat->nrows = mat->nrows;
+  nmat->ncols = mat->ncols;
+
+  nrows  = mat->nrows; 
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ZScoreFilter: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      gk_errexit(SIGERR, "This has not been implemented yet.\n");
+      break;
+
+    case GK_CSR_ROW:
+      if (mat->rowptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        avgwgt = zscore/(rowptr[i+1]-rowptr[i]);
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          if (rowval[j] > avgwgt) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+            nnz++;
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Compacts the column-space of the matrix by removing empty columns.
+    As a result of the compaction, the column numbers are renumbered. 
+    The compaction operation is done in place and only affects the row-based
+    representation of the matrix.
+    The new columns are ordered in decreasing frequency.
+   
+    \param mat the matrix whose empty columns will be removed.
+*/
+/**************************************************************************/
+void gk_csr_CompactColumns(gk_csr_t *mat)
+{
+  ssize_t i;
+  int nrows, ncols, nncols;
+  ssize_t *rowptr;
+  int *rowind, *colmap;
+  gk_ikv_t *clens;
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+
+  colmap = gk_imalloc(ncols, "gk_csr_CompactColumns: colmap");
+
+  clens = gk_ikvmalloc(ncols, "gk_csr_CompactColumns: clens");
+  for (i=0; i<ncols; i++) {
+    clens[i].key = 0;
+    clens[i].val = i;
+  }
+
+  for (i=0; i<rowptr[nrows]; i++) 
+    clens[rowind[i]].key++;
+  gk_ikvsortd(ncols, clens);
+
+  for (nncols=0, i=0; i<ncols; i++) {
+    if (clens[i].key > 0) 
+      colmap[clens[i].val] = nncols++;
+    else
+      break;
+  }
+
+  for (i=0; i<rowptr[nrows]; i++) 
+    rowind[i] = colmap[rowind[i]];
+
+  mat->ncols = nncols;
+
+  gk_free((void **)&colmap, &clens, LTERM);
+}
+
+
+/*************************************************************************/
+/*! Sorts the indices in increasing order
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which set of
+           indices to sort.
+*/
+/**************************************************************************/
+void gk_csr_SortIndices(gk_csr_t *mat, int what)
+{
+  int n, nn=0;
+  ssize_t *ptr;
+  int *ind;
+  float *val;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      if (!mat->rowptr)
+        gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
+
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      ind = mat->rowind;
+      val = mat->rowval;
+      break;
+
+    case GK_CSR_COL:
+      if (!mat->colptr)
+        gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
+
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      ind = mat->colind;
+      val = mat->colval;
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return;
+  }
+
+  #pragma omp parallel if (n > 100)
+  {
+    ssize_t i, j, k;
+    gk_ikv_t *cand;
+    float *tval;
+
+    #pragma omp single
+    for (i=0; i<n; i++) 
+      nn = gk_max(nn, ptr[i+1]-ptr[i]);
+  
+    cand = gk_ikvmalloc(nn, "gk_csr_SortIndices: cand");
+    tval = gk_fmalloc(nn, "gk_csr_SortIndices: tval");
+  
+    #pragma omp for schedule(static)
+    for (i=0; i<n; i++) {
+      for (k=0, j=ptr[i]; j<ptr[i+1]; j++) {
+        if (j > ptr[i] && ind[j] < ind[j-1])
+          k = 1; /* an inversion */
+        cand[j-ptr[i]].val = j-ptr[i];
+        cand[j-ptr[i]].key = ind[j];
+        tval[j-ptr[i]]     = val[j];
+      }
+      if (k) {
+        gk_ikvsorti(ptr[i+1]-ptr[i], cand);
+        for (j=ptr[i]; j<ptr[i+1]; j++) {
+          ind[j] = cand[j-ptr[i]].key;
+          val[j] = tval[cand[j-ptr[i]].val];
+        }
+      }
+    }
+
+    gk_free((void **)&cand, &tval, LTERM);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Creates a row/column index from the column/row data.
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which index
+           will be created.
+*/
+/**************************************************************************/
+void gk_csr_CreateIndex(gk_csr_t *mat, int what)
+{
+  /* 'f' stands for forward, 'r' stands for reverse */
+  ssize_t i, j, k, nf, nr;
+  ssize_t *fptr, *rptr;
+  int *find, *rind;
+  float *fval, *rval;
+
+  switch (what) {
+    case GK_CSR_COL:
+      nf   = mat->nrows;
+      fptr = mat->rowptr;
+      find = mat->rowind;
+      fval = mat->rowval;
+
+      if (mat->colptr) gk_free((void **)&mat->colptr, LTERM);
+      if (mat->colind) gk_free((void **)&mat->colind, LTERM);
+      if (mat->colval) gk_free((void **)&mat->colval, LTERM);
+
+      nr   = mat->ncols;
+      rptr = mat->colptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr");
+      rind = mat->colind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind");
+      rval = mat->colval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL);
+      break;
+    case GK_CSR_ROW:
+      nf   = mat->ncols;
+      fptr = mat->colptr;
+      find = mat->colind;
+      fval = mat->colval;
+
+      if (mat->rowptr) gk_free((void **)&mat->rowptr, LTERM);
+      if (mat->rowind) gk_free((void **)&mat->rowind, LTERM);
+      if (mat->rowval) gk_free((void **)&mat->rowval, LTERM);
+
+      nr   = mat->nrows;
+      rptr = mat->rowptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr");
+      rind = mat->rowind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind");
+      rval = mat->rowval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL);
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return;
+  }
+
+
+  for (i=0; i<nf; i++) {
+    for (j=fptr[i]; j<fptr[i+1]; j++)
+      rptr[find[j]]++;
+  }
+  MAKECSR(i, nr, rptr);
+  
+  if (rptr[nr] > 6*nr) {
+    for (i=0; i<nf; i++) {
+      for (j=fptr[i]; j<fptr[i+1]; j++) 
+        rind[rptr[find[j]]++] = i;
+    }
+    SHIFTCSR(i, nr, rptr);
+
+    if (fval) {
+      for (i=0; i<nf; i++) {
+        for (j=fptr[i]; j<fptr[i+1]; j++) 
+          rval[rptr[find[j]]++] = fval[j];
+      }
+      SHIFTCSR(i, nr, rptr);
+    }
+  }
+  else {
+    if (fval) {
+      for (i=0; i<nf; i++) {
+        for (j=fptr[i]; j<fptr[i+1]; j++) {
+          k = find[j];
+          rind[rptr[k]]   = i;
+          rval[rptr[k]++] = fval[j];
+        }
+      }
+    }
+    else {
+      for (i=0; i<nf; i++) {
+        for (j=fptr[i]; j<fptr[i+1]; j++) 
+          rind[rptr[find[j]]++] = i;
+      }
+    }
+    SHIFTCSR(i, nr, rptr);
+  }
+}
+
+
+/*************************************************************************/
+/*! Normalizes the rows/columns of the matrix to be unit 
+    length.
+    \param mat the matrix itself,
+    \param what indicates what will be normalized and is obtained by
+           specifying GK_CSR_ROW, GK_CSR_COL, GK_CSR_ROW|GK_CSR_COL. 
+    \param norm indicates what norm is to normalize to, 1: 1-norm, 2: 2-norm
+*/
+/**************************************************************************/
+void gk_csr_Normalize(gk_csr_t *mat, int what, int norm)
+{
+  ssize_t i, j;
+  int n;
+  ssize_t *ptr;
+  float *val, sum;
+
+
+  if (what&GK_CSR_ROW && mat->rowval) {
+    n   = mat->nrows;
+    ptr = mat->rowptr;
+    val = mat->rowval;
+
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) private(j,sum) schedule(static)
+    for (i=0; i<n; i++) {
+      sum = 0.0;
+      if (norm == 1) {
+        for (j=ptr[i]; j<ptr[i+1]; j++) 
+          sum += val[j]; /* assume val[j] > 0 */ 
+        if (sum > 0)
+          sum = 1.0/sum;
+      }
+      else if (norm == 2) {
+        for (j=ptr[i]; j<ptr[i+1]; j++) 
+          sum += val[j]*val[j];
+        if (sum > 0)
+          sum = 1.0/sqrt(sum); 
+      }
+      for (j=ptr[i]; j<ptr[i+1]; j++)
+        val[j] *= sum;
+    }
+  }
+
+  if (what&GK_CSR_COL && mat->colval) {
+    n   = mat->ncols;
+    ptr = mat->colptr;
+    val = mat->colval;
+
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) private(j,sum) schedule(static)
+    for (i=0; i<n; i++) {
+      sum = 0.0;
+      if (norm == 1) {
+        for (j=ptr[i]; j<ptr[i+1]; j++) 
+          sum += val[j]; /* assume val[j] > 0 */ 
+        if (sum > 0)
+          sum = 1.0/sum;
+      }
+      else if (norm == 2) {
+        for (j=ptr[i]; j<ptr[i+1]; j++) 
+          sum += val[j]*val[j];
+        if (sum > 0)
+          sum = 1.0/sqrt(sum); 
+      }
+      for (j=ptr[i]; j<ptr[i+1]; j++)
+        val[j] *= sum;
+    }
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Applies different row scaling methods.
+    \param mat the matrix itself,
+    \param type indicates the type of row scaling. Possible values are:
+           GK_CSR_MAXTF, GK_CSR_SQRT, GK_CSR_LOG, GK_CSR_IDF, GK_CSR_MAXTF2.
+*/
+/**************************************************************************/
+void gk_csr_Scale(gk_csr_t *mat, int type)
+{
+  ssize_t i, j;
+  int nrows, ncols, nnzcols, bgfreq;
+  ssize_t *rowptr;
+  int *rowind, *collen;
+  float *rowval, *cscale, maxtf;
+  double logscale = 1.0/log(2.0);
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  switch (type) {
+    case GK_CSR_MAXTF: /* TF' = .5 + .5*TF/MAX(TF) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j, maxtf) schedule(static)
+      for (i=0; i<nrows; i++) {
+        maxtf = fabs(rowval[rowptr[i]]);
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+          maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf);
+  
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          rowval[j] = .5 + .5*rowval[j]/maxtf;
+      }
+      break;
+
+    case GK_CSR_MAXTF2: /* TF' = .1 + .9*TF/MAX(TF) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j, maxtf) schedule(static)
+      for (i=0; i<nrows; i++) {
+        maxtf = fabs(rowval[rowptr[i]]);
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+          maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf);
+  
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          rowval[j] = .1 + .9*rowval[j]/maxtf;
+      }
+      break;
+
+    case GK_CSR_SQRT: /* TF' = .1+SQRT(TF) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], sqrt(fabs(rowval[j])));
+        }
+      }
+      
+      break;
+
+    case GK_CSR_POW25: /* TF' = .1+POW(TF,.25) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], sqrt(sqrt(fabs(rowval[j]))));
+        }
+      }
+      break;
+
+    case GK_CSR_POW65: /* TF' = .1+POW(TF,.65) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .65));
+        }
+      }
+      break;
+
+    case GK_CSR_POW75: /* TF' = .1+POW(TF,.75) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .75));
+        }
+      }
+      break;
+
+    case GK_CSR_POW85: /* TF' = .1+POW(TF,.85) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .85));
+        }
+      }
+      break;
+
+    case GK_CSR_LOG: /* TF' = 1+log_2(TF) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) schedule(static,32)
+      for (i=0; i<rowptr[nrows]; i++) {
+        if (rowval[i] != 0.0)
+          rowval[i] = 1+(rowval[i]>0.0 ? log(rowval[i]) : -log(-rowval[i]))*logscale;
+      }
+#ifdef XXX
+      #pragma omp parallel for private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = 1+(rowval[j]>0.0 ? log(rowval[j]) : -log(-rowval[j]))*logscale;
+            //rowval[j] = 1+sign(rowval[j], log(fabs(rowval[j]))*logscale);
+        }
+      }
+#endif
+      break;
+
+    case GK_CSR_IDF: /* TF' = TF*IDF */
+      ncols  = mat->ncols;
+      cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale");
+      collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          collen[rowind[j]]++;
+      }
+
+      #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static)
+      for (i=0; i<ncols; i++)
+        cscale[i] = (collen[i] > 0 ? log(1.0*nrows/collen[i]) : 0.0);
+
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          rowval[j] *= cscale[rowind[j]];
+      }
+      
+      gk_free((void **)&cscale, &collen, LTERM);
+      break;
+
+    case GK_CSR_IDF2: /* TF' = TF*IDF */
+      ncols  = mat->ncols;
+      cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale");
+      collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          collen[rowind[j]]++;
+      }
+
+      nnzcols = 0;
+      #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static) reduction(+:nnzcols)
+      for (i=0; i<ncols; i++)
+        nnzcols += (collen[i] > 0 ? 1 : 0);
+
+      bgfreq = gk_max(10, (ssize_t)(.5*rowptr[nrows]/nnzcols));
+      printf("nnz: %zd, nnzcols: %d, bgfreq: %d\n", rowptr[nrows], nnzcols, bgfreq);
+
+      #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static)
+      for (i=0; i<ncols; i++)
+        cscale[i] = (collen[i] > 0 ? log(1.0*(nrows+2*bgfreq)/(bgfreq+collen[i])) : 0.0);
+
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          rowval[j] *= cscale[rowind[j]];
+      }
+
+      gk_free((void **)&cscale, &collen, LTERM);
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown scaling type of %d\n", type);
+  }
+}
+
+
+/*************************************************************************/
+/*! Computes the sums of the rows/columns
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which 
+           sums to compute.
+*/
+/**************************************************************************/
+void gk_csr_ComputeSums(gk_csr_t *mat, int what)
+{
+  ssize_t i;
+  int n;
+  ssize_t *ptr;
+  float *val, *sums;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      val = mat->rowval;
+
+      if (mat->rsums) 
+        gk_free((void **)&mat->rsums, LTERM);
+
+      sums = mat->rsums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums");
+      break;
+    case GK_CSR_COL:
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      val = mat->colval;
+
+      if (mat->csums) 
+        gk_free((void **)&mat->csums, LTERM);
+
+      sums = mat->csums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums");
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid sum type of %d.\n", what);
+      return;
+  }
+
+  if (val) {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      sums[i] = gk_fsum(ptr[i+1]-ptr[i], val+ptr[i], 1);
+  }
+  else {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      sums[i] = ptr[i+1]-ptr[i];
+  }
+}
+
+
+/*************************************************************************/
+/*! Computes the norms of the rows/columns
+
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which 
+           squared norms to compute.
+
+    \note If the rowval/colval arrays are NULL, the matrix is assumed
+          to be binary and the norms are computed accordingly.
+*/
+/**************************************************************************/
+void gk_csr_ComputeNorms(gk_csr_t *mat, int what)
+{
+  ssize_t i;
+  int n;
+  ssize_t *ptr;
+  float *val, *norms;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      val = mat->rowval;
+
+      if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM);
+
+      norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    case GK_CSR_COL:
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      val = mat->colval;
+
+      if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM);
+
+      norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid norm type of %d.\n", what);
+      return;
+  }
+
+  if (val) {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      norms[i] = sqrt(gk_fdot(ptr[i+1]-ptr[i], val+ptr[i], 1, val+ptr[i], 1));
+  }
+  else {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      norms[i] = sqrt(ptr[i+1]-ptr[i]);
+  }
+}
+
+
+/*************************************************************************/
+/*! Computes the squared of the norms of the rows/columns
+
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which 
+           squared norms to compute.
+
+    \note If the rowval/colval arrays are NULL, the matrix is assumed
+          to be binary and the norms are computed accordingly.
+*/
+/**************************************************************************/
+void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what)
+{
+  ssize_t i;
+  int n;
+  ssize_t *ptr;
+  float *val, *norms;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      val = mat->rowval;
+
+      if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM);
+
+      norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    case GK_CSR_COL:
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      val = mat->colval;
+
+      if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM);
+
+      norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid norm type of %d.\n", what);
+      return;
+  }
+
+  if (val) {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      norms[i] = gk_fdot(ptr[i+1]-ptr[i], val+ptr[i], 1, val+ptr[i], 1);
+  }
+  else {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      norms[i] = ptr[i+1]-ptr[i];
+  }
+}
+
+
+/*************************************************************************/
+/*! Returns a new matrix whose rows/columns are shuffled.
+   
+    \param mat the matrix to be shuffled,
+    \param what indicates if the rows (GK_CSR_ROW), columns (GK_CSR_COL),
+           or both (GK_CSR_ROWCOL) will be shuffled,
+    \param symmetric indicates if the same shuffling will be applied to 
+           both rows and columns. This is valid with nrows==ncols and 
+           GK_CSR_ROWCOL was specified.
+    \returns the shuffled matrix. 
+          The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Shuffle(gk_csr_t *mat, int what, int symmetric)
+{
+  ssize_t i, j;
+  int nrows, ncols;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind;
+  int *rperm, *cperm;
+  float *rowval, *nrowval;
+  gk_csr_t *nmat;
+
+  if (what == GK_CSR_ROWCOL && symmetric && mat->nrows != mat->ncols)
+    gk_errexit(SIGERR, "The matrix is not square for a symmetric rowcol shuffling.\n");
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  rperm = gk_imalloc(nrows, "gk_csr_Shuffle: rperm");
+  cperm = gk_imalloc(ncols, "gk_csr_Shuffle: cperm");
+
+  switch (what) {
+    case GK_CSR_ROW:
+      gk_RandomPermute(nrows, rperm, 1);
+      for (i=0; i<20; i++)
+        gk_RandomPermute(nrows, rperm, 0);
+
+      for (i=0; i<ncols; i++)
+        cperm[i] = i;
+      break;
+
+    case GK_CSR_COL:
+      gk_RandomPermute(ncols, cperm, 1);
+      for (i=0; i<20; i++)
+        gk_RandomPermute(ncols, cperm, 0);
+
+      for (i=0; i<nrows; i++)
+        rperm[i] = i;
+      break;
+
+    case GK_CSR_ROWCOL:
+      gk_RandomPermute(nrows, rperm, 1);
+      for (i=0; i<20; i++)
+        gk_RandomPermute(nrows, rperm, 0);
+
+      if (symmetric)
+        gk_icopy(nrows, rperm, cperm);
+      else {
+        gk_RandomPermute(ncols, cperm, 1);
+        for (i=0; i<20; i++)
+          gk_RandomPermute(ncols, cperm, 0);
+      }
+      break;
+
+    default:
+      gk_free((void **)&rperm, &cperm, LTERM);
+      gk_errexit(SIGERR, "Unknown shuffling type of %d\n", what);
+      return NULL;
+  }
+
+  nmat = gk_csr_Create();
+  nmat->nrows = nrows;
+  nmat->ncols = ncols;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Shuffle: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Shuffle: nrowind");
+  nrowval = nmat->rowval = (rowval ? gk_fmalloc(rowptr[nrows], "gk_csr_Shuffle: nrowval") : NULL) ;
+
+  for (i=0; i<nrows; i++)
+    nrowptr[rperm[i]] = rowptr[i+1]-rowptr[i];
+  MAKECSR(i, nrows, nrowptr);
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      nrowind[nrowptr[rperm[i]]] = cperm[rowind[j]];
+      if (nrowval)
+        nrowval[nrowptr[rperm[i]]] = rowval[j];
+      nrowptr[rperm[i]]++;
+    }
+  }
+  SHIFTCSR(i, nrows, nrowptr);
+
+  gk_free((void **)&rperm, &cperm, LTERM);
+
+  return nmat;
+
+}
+
+
+/*************************************************************************/
+/*! Returns the transpose of the matrix.
+   
+    \param mat the matrix to be transposed,
+    \returns the transposed matrix. 
+          The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Transpose(gk_csr_t *mat)
+{
+  int nrows, ncols;
+  ssize_t *colptr;
+  int32_t *colind;
+  float *colval;
+  gk_csr_t *nmat;
+
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colval = mat->colval;
+
+  mat->colptr = NULL;
+  mat->colind = NULL;
+  mat->colval = NULL;
+
+  gk_csr_CreateIndex(mat, GK_CSR_COL);
+
+  nmat = gk_csr_Create();
+  nmat->nrows  = mat->ncols;
+  nmat->ncols  = mat->nrows;
+  nmat->rowptr = mat->colptr;
+  nmat->rowind = mat->colind;
+  nmat->rowval = mat->colval;
+
+  mat->colptr = colptr;
+  mat->colind = colind;
+  mat->colval = colval;
+
+  return nmat;
+
+}
+
+
+/*************************************************************************/
+/*! Computes the similarity between two rows/columns
+
+    \param mat the matrix itself. The routine assumes that the indices
+           are sorted in increasing order.
+    \param i1 is the first row/column,
+    \param i2 is the second row/column,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating the type of
+           objects between the similarity will be computed,
+    \param simtype is the type of similarity and is one of GK_CSR_COS,
+           GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN
+    \returns the similarity between the two rows/columns.
+*/
+/**************************************************************************/
+float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, 
+          int simtype)
+{
+  int nind1, nind2;
+  int *ind1, *ind2;
+  float *val1, *val2, stat1, stat2, sim;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      if (!mat->rowptr)
+        gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
+      nind1 = mat->rowptr[i1+1]-mat->rowptr[i1];
+      nind2 = mat->rowptr[i2+1]-mat->rowptr[i2];
+      ind1  = mat->rowind + mat->rowptr[i1];
+      ind2  = mat->rowind + mat->rowptr[i2];
+      val1  = mat->rowval + mat->rowptr[i1];
+      val2  = mat->rowval + mat->rowptr[i2];
+      break;
+
+    case GK_CSR_COL:
+      if (!mat->colptr)
+        gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
+      nind1 = mat->colptr[i1+1]-mat->colptr[i1];
+      nind2 = mat->colptr[i2+1]-mat->colptr[i2];
+      ind1  = mat->colind + mat->colptr[i1];
+      ind2  = mat->colind + mat->colptr[i2];
+      val1  = mat->colval + mat->colptr[i1];
+      val2  = mat->colval + mat->colptr[i2];
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return 0.0;
+  }
+
+
+  switch (simtype) {
+    case GK_CSR_COS:
+    case GK_CSR_JAC:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else {
+          sim   += val1[i1]*val2[i2];
+          stat1 += val1[i1]*val1[i1];
+          stat2 += val2[i2]*val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      if (simtype == GK_CSR_COS)
+        sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0);
+      else 
+        sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+      break;
+
+    case GK_CSR_MIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+
+      break;
+
+    case GK_CSR_AMIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1 > 0.0 ? sim/stat1 : 0.0);
+
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
+      return -1;
+  }
+
+  return sim;
+
+}
+
+
+/*************************************************************************/
+/*! Computes the similarity between two rows/columns
+
+    \param mat_a the first matrix. The routine assumes that the indices
+           are sorted in increasing order.
+    \param mat_b the second matrix. The routine assumes that the indices
+           are sorted in increasing order.
+    \param i1 is the row/column from the first matrix (mat_a),
+    \param i2 is the row/column from the second matrix (mat_b),
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating the type of
+           objects between the similarity will be computed,
+    \param simtype is the type of similarity and is one of GK_CSR_COS,
+           GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN
+    \returns the similarity between the two rows/columns.
+*/
+/**************************************************************************/
+float gk_csr_ComputePairSimilarity(gk_csr_t *mat_a, gk_csr_t *mat_b, 
+          int i1, int i2, int what, int simtype)
+{
+  int nind1, nind2;
+  int *ind1, *ind2;
+  float *val1, *val2, stat1, stat2, sim;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      if (!mat_a->rowptr || !mat_b->rowptr)
+        gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
+      nind1 = mat_a->rowptr[i1+1]-mat_a->rowptr[i1];
+      nind2 = mat_b->rowptr[i2+1]-mat_b->rowptr[i2];
+      ind1  = mat_a->rowind + mat_a->rowptr[i1];
+      ind2  = mat_b->rowind + mat_b->rowptr[i2];
+      val1  = mat_a->rowval + mat_a->rowptr[i1];
+      val2  = mat_b->rowval + mat_b->rowptr[i2];
+      break;
+
+    case GK_CSR_COL:
+      if (!mat_a->colptr || !mat_b->colptr)
+        gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
+      nind1 = mat_a->colptr[i1+1]-mat_a->colptr[i1];
+      nind2 = mat_b->colptr[i2+1]-mat_b->colptr[i2];
+      ind1  = mat_a->colind + mat_a->colptr[i1];
+      ind2  = mat_b->colind + mat_b->colptr[i2];
+      val1  = mat_a->colval + mat_a->colptr[i1];
+      val2  = mat_b->colval + mat_b->colptr[i2];
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return 0.0;
+  }
+
+
+  switch (simtype) {
+    case GK_CSR_COS:
+    case GK_CSR_JAC:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else {
+          sim   += val1[i1]*val2[i2];
+          stat1 += val1[i1]*val1[i1];
+          stat2 += val2[i2]*val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      if (simtype == GK_CSR_COS)
+        sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0);
+      else 
+        sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+      break;
+
+    case GK_CSR_MIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+
+      break;
+
+    case GK_CSR_AMIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1 > 0.0 ? sim/stat1 : 0.0);
+
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
+      return -1;
+  }
+
+  return sim;
+
+}
+
+/*************************************************************************/
+/*! Finds the n most similar rows (neighbors) to the query.
+
+    \param mat the matrix itself
+    \param nqterms is the number of columns in the query
+    \param qind is the list of query columns
+    \param qval is the list of correspodning query weights
+    \param simtype is the type of similarity and is one of GK_CSR_DOTP,
+           GK_CSR_COS, GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN. In case of 
+           GK_CSR_COS, the rows and the query are assumed to be of unit 
+           length.
+    \param nsim is the maximum number of requested most similar rows.
+           If -1 is provided, then everything is returned unsorted.
+    \param minsim is the minimum similarity of the requested most 
+           similar rows
+    \param hits is the result set. This array should be at least
+           of length nsim.
+    \param i_marker is an array of size equal to the number of rows
+           whose values are initialized to -1. If NULL is provided
+           then this array is allocated and freed internally.
+    \param i_cand is an array of size equal to the number of rows.
+           If NULL is provided then this array is allocated and freed 
+           internally.
+    \returns The number of identified most similar rows, which can be
+             smaller than the requested number of nnbrs in those cases
+             in which there are no sufficiently many neighbors.
+*/
+/**************************************************************************/
+int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, 
+        float *qval, int simtype, int nsim, float minsim, gk_fkv_t *hits, 
+        int *i_marker, gk_fkv_t *i_cand)
+{
+  ssize_t i, ii, j, k;
+  int nrows, ncols, ncand;
+  ssize_t *colptr;
+  int *colind, *marker;
+  float *colval, *rnorms, mynorm, *rsums, mysum;
+  gk_fkv_t *cand;
+
+  if (nqterms == 0)
+    return 0;
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  GKASSERT((colptr = mat->colptr) != NULL);
+  GKASSERT((colind = mat->colind) != NULL);
+  GKASSERT((colval = mat->colval) != NULL);
+
+  marker = (i_marker ? i_marker : gk_ismalloc(nrows, -1, "gk_csr_SimilarRows: marker"));
+  cand   = (i_cand   ? i_cand   : gk_fkvmalloc(nrows, "gk_csr_SimilarRows: cand"));
+
+  switch (simtype) {
+    case GK_CSR_DOTP:
+    case GK_CSR_COS:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += colval[j]*qval[ii];
+          }
+        }
+      }
+      break;
+
+    case GK_CSR_JAC:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += colval[j]*qval[ii];
+          }
+        }
+      }
+
+      GKASSERT((rnorms = mat->rnorms) != NULL);
+      mynorm = gk_fdot(nqterms, qval, 1, qval, 1);
+
+      for (i=0; i<ncand; i++)
+        cand[i].key = cand[i].key/(rnorms[cand[i].val]+mynorm-cand[i].key);
+      break;
+
+    case GK_CSR_MIN:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += gk_min(colval[j], qval[ii]);
+          }
+        }
+      }
+
+      GKASSERT((rsums = mat->rsums) != NULL);
+      mysum = gk_fsum(nqterms, qval, 1);
+
+      for (i=0; i<ncand; i++)
+        cand[i].key = cand[i].key/(rsums[cand[i].val]+mysum-cand[i].key);
+      break;
+
+    /* Assymetric MIN  similarity */
+    case GK_CSR_AMIN:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += gk_min(colval[j], qval[ii]);
+          }
+        }
+      }
+
+      mysum = gk_fsum(nqterms, qval, 1);
+
+      for (i=0; i<ncand; i++)
+        cand[i].key = cand[i].key/mysum;
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
+      return -1;
+  }
+
+  /* go and prune the hits that are bellow minsim */
+  for (j=0, i=0; i<ncand; i++) {
+    marker[cand[i].val] = -1;
+    if (cand[i].key >= minsim) 
+      cand[j++] = cand[i];
+  }
+  ncand = j;
+
+  if (nsim == -1 || nsim >= ncand) {
+    nsim = ncand;
+  }
+  else {
+    nsim = gk_min(nsim, ncand);
+    gk_dfkvkselect(ncand, nsim, cand);
+    gk_fkvsortd(nsim, cand);
+  }
+
+  gk_fkvcopy(nsim, cand, hits);
+
+  if (i_marker == NULL)
+    gk_free((void **)&marker, LTERM);
+  if (i_cand == NULL)
+    gk_free((void **)&cand, LTERM);
+
+  return nsim;
+}
+
+
+/*************************************************************************/
+/*! Returns a symmetric version of a square matrix. The symmetric version
+    is constructed by applying an A op A^T operation, where op is one of
+    GK_CSR_SYM_SUM, GK_CSR_SYM_MIN, GK_CSR_SYM_MAX, GK_CSR_SYM_AVG.
+   
+    \param mat the matrix to be symmetrized,
+    \param op indicates the operation to be performed. The possible values are
+           GK_CSR_SYM_SUM, GK_CSR_SYM_MIN, GK_CSR_SYM_MAX, and GK_CSR_SYM_AVG.
+
+    \returns the symmetrized matrix consisting only of its row-based structure. 
+          The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_MakeSymmetric(gk_csr_t *mat, int op)
+{
+  ssize_t i, j, k, nnz;
+  int nrows, nadj, hasvals;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind, *marker, *ids;
+  float *rowval=NULL, *colval=NULL, *nrowval=NULL, *wgts=NULL;
+  gk_csr_t *nmat;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_MakeSymmetric: The matrix needs to be square.\n");
+    return NULL;
+  }
+
+  hasvals = (mat->rowval != NULL);
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  if (hasvals)
+    rowval = mat->rowval;
+
+  /* create the column view for efficient processing */
+  colptr = gk_zsmalloc(nrows+1, 0, "colptr");
+  colind = gk_i32malloc(rowptr[nrows], "colind");
+  if (hasvals)
+    colval = gk_fmalloc(rowptr[nrows], "colval");
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      colptr[rowind[j]]++;
+  }
+  MAKECSR(i, nrows, colptr);
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      colind[colptr[rowind[j]]] = i;
+      if (hasvals)
+        colval[colptr[rowind[j]]] = rowval[j];
+      colptr[rowind[j]]++;
+    }
+  }
+  SHIFTCSR(i, nrows, colptr);
+
+
+  nmat = gk_csr_Create();
+  
+  nmat->nrows = mat->nrows;
+  nmat->ncols = mat->ncols;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_MakeSymmetric: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowind");
+  if (hasvals)
+    nrowval = nmat->rowval = gk_fmalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval");
+
+  marker = gk_ismalloc(nrows, -1, "marker");
+  ids    = gk_imalloc(nrows, "ids");
+  if (hasvals)
+    wgts = gk_fmalloc(nrows, "wgts");
+
+  nrowptr[0] = nnz = 0;
+  for (i=0; i<nrows; i++) {
+    nadj = 0;
+    /* out-edges */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      ids[nadj] = rowind[j]; 
+      if (hasvals)
+        wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*rowval[j] : rowval[j]);
+      marker[rowind[j]] = nadj++;
+    }
+
+    /* in-edges */
+    for (j=colptr[i]; j<colptr[i+1]; j++) {
+      if (marker[colind[j]] == -1) {
+        if (op != GK_CSR_SYM_MIN) {
+          ids[nadj] = colind[j]; 
+          if (hasvals) 
+            wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*colval[j] : colval[j]);
+          nadj++;
+        }
+      }
+      else {
+        if (hasvals) {
+          switch (op) {
+            case GK_CSR_SYM_MAX:
+              wgts[marker[colind[j]]] = gk_max(colval[j], wgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_MIN:
+              wgts[marker[colind[j]]] = gk_min(colval[j], wgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_SUM:
+              wgts[marker[colind[j]]] += colval[j];
+              break;
+            case GK_CSR_SYM_AVG:
+              wgts[marker[colind[j]]] = 0.5*(wgts[marker[colind[j]]] + colval[j]);
+              break;
+            default:
+              errexit("Unsupported op for MakeSymmetric!\n");
+          }
+        }
+        marker[colind[j]] = -1;
+      }
+    }
+
+    /* go over out edges again to resolve any edges that were not found in the in
+     * edges */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      if (marker[rowind[j]] != -1) {
+        if (op == GK_CSR_SYM_MIN)
+          ids[marker[rowind[j]]] = -1;
+        marker[rowind[j]] = -1;
+      }
+    }
+
+    /* put the non '-1' entries in ids[] into i's row */
+    for (j=0; j<nadj; j++) {
+      if (ids[j] != -1) {
+        nrowind[nnz] = ids[j];
+        if (hasvals)
+          nrowval[nnz] = wgts[j];
+        nnz++;
+      }
+    }
+    nrowptr[i+1] = nnz;
+  }
+
+  gk_free((void **)&colptr, &colind, &colval, &marker, &ids, &wgts, LTERM);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! This function finds the connected components in a graph stored in
+    CSR format.
+
+    \param mat is the graph structure in CSR format
+    \param cptr is the ptr structure of the CSR representation of the 
+           components. The length of this vector must be mat->nrows+1.
+    \param cind is the indices structure of the CSR representation of 
+           the components. The length of this vector must be mat->nrows.
+    \param cids is an array that stores the component # of each vertex
+           of the graph. The length of this vector must be mat->nrows.
+
+    \returns the number of components that it found.
+
+    \note The cptr, cind, and cids parameters can be NULL, in which case 
+          only the number of connected components is returned.
+*/
+/*************************************************************************/
+int gk_csr_FindConnectedComponents(gk_csr_t *mat, int32_t *cptr, int32_t *cind, 
+        int32_t *cids)
+{
+  ssize_t i, ii, j, jj, k, nvtxs, first, last, ntodo, ncmps;
+  ssize_t *xadj;
+  int32_t *adjncy, *pos, *todo;
+  int32_t mustfree_ccsr=0, mustfree_where=0;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_FindComponents: The matrix needs to be square.\n");
+    return -1;
+  }
+
+  nvtxs  = mat->nrows;
+  xadj   = mat->rowptr;
+  adjncy = mat->rowind;
+
+  /* Deal with NULL supplied cptr/cind vectors */
+  if (cptr == NULL) {
+    cptr = gk_i32malloc(nvtxs+1, "gk_csr_FindComponents: cptr");
+    cind = gk_i32malloc(nvtxs, "gk_csr_FindComponents: cind");
+    mustfree_ccsr = 1;
+  }
+
+  /* The list of vertices that have not been touched yet. 
+     The valid entries are from [0..ntodo). */
+  todo = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_csr_FindComponents: todo"));
+
+  /* For a vertex that has not been visited, pos[i] is the position in the
+     todo list that this vertex is stored. 
+     If a vertex has been visited, pos[i] = -1. */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_csr_FindComponents: pos"));
+
+
+  /* Find the connected componends */
+  ncmps = -1;
+  ntodo = nvtxs;     /* All vertices have not been visited */
+  first = last = 0;  /* Point to the first and last vertices that have been touched
+                        but not explored. 
+                        These vertices are stored in cind[first]...cind[last-1]. */
+
+  while (first < last || ntodo > 0) {
+    if (first == last) { /* Find another starting vertex */
+      cptr[++ncmps] = first;  /* Mark the end of the current CC */
+
+      /* put the first vertex in the todo list as the start of the new CC */
+      ASSERT(pos[todo[0]] != -1);
+      cind[last++] = todo[0];  
+
+      pos[todo[0]] = -1;
+      todo[0] = todo[--ntodo];
+      pos[todo[0]] = 0;
+    }
+
+    i = cind[first++];  /* Get the first visited but unexplored vertex */
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      if (pos[k] != -1) {
+        cind[last++] = k;
+
+        /* Remove k from the todo list and put the last item in the todo 
+           list at the position that k was so that the todo list will be
+           consequtive. The pos[] array is updated accordingly to keep track
+           the location of the vertices in the todo[] list. */
+        todo[pos[k]] = todo[--ntodo];
+        pos[todo[pos[k]]] = pos[k];
+        pos[k] = -1;
+      }
+    }
+  }
+  cptr[++ncmps] = first;
+
+  /* see if we need to return cids */
+  if (cids != NULL) {
+    for (i=0; i<ncmps; i++) {
+      for (j=cptr[i]; j<cptr[i+1]; j++)
+        cids[cind[j]] = i;
+    }
+  }
+
+  if (mustfree_ccsr)
+    gk_free((void **)&cptr, &cind, LTERM);
+
+  gk_free((void **)&pos, &todo, LTERM);
+
+  return (int) ncmps;
+}
+
+
+/*************************************************************************/
+/*! Returns a matrix that has been reordered according to the provided
+    row/column permutation. The matrix is required to be square and the same
+    permutation is applied to both rows and columns.
+
+    \param[IN] mat is the matrix to be re-ordered.
+    \param[IN] perm is the new ordering of the rows & columns
+    \param[IN] iperm is the original ordering of the re-ordered matrix's rows & columns
+    \returns the newly created reordered matrix.
+
+    \note Either perm or iperm can be NULL but not both.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ReorderSymmetric(gk_csr_t *mat, int32_t *perm, int32_t *iperm)
+{
+  ssize_t j, jj;
+  ssize_t *rowptr, *nrowptr;
+  int i, k, u, v, nrows;
+  int freeperm=0, freeiperm=0;
+  int32_t *rowind, *nrowind;
+  float *rowval, *nrowval;
+  gk_csr_t *nmat;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_ReorderSymmetric: The matrix needs to be square.\n");
+    return NULL;
+  }
+
+  if (perm == NULL && iperm == NULL)
+    return NULL;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows = nrows;
+  nmat->ncols = nrows;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ReorderSymmetric: rowptr");
+  nrowind = nmat->rowind = gk_i32malloc(rowptr[nrows], "gk_csr_ReorderSymmetric: rowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ReorderSymmetric: rowval");
+
+  /* allocate memory for the different structures present in the matrix */
+  if (mat->rlabels)
+    nmat->rlabels = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: rlabels");
+  if (mat->rmap)
+    nmat->rmap = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: rmap");
+  if (mat->rnorms)
+    nmat->rnorms = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rnorms");
+  if (mat->rsums)
+    nmat->rsums = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rsums");
+  if (mat->rsizes)
+    nmat->rsizes = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rsizes");
+  if (mat->rvols)
+    nmat->rvols = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rvols");
+  if (mat->rwgts)
+    nmat->rwgts = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rwgts");
+
+  if (mat->clabels)
+    nmat->clabels = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: clabels");
+  if (mat->cmap)
+    nmat->cmap = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: cmap");
+  if (mat->cnorms)
+    nmat->cnorms = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cnorms");
+  if (mat->csums)
+    nmat->csums = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: csums");
+  if (mat->csizes)
+    nmat->csizes = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: csizes");
+  if (mat->cvols)
+    nmat->cvols = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cvols");
+  if (mat->cwgts)
+    nmat->cwgts = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cwgts");
+
+
+
+  /* create perm/iperm if not provided */
+  if (perm == NULL) {
+    freeperm = 1;
+    perm = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: perm"); 
+    for (i=0; i<nrows; i++)
+      perm[iperm[i]] = i;
+  }
+  if (iperm == NULL) {
+    freeiperm = 1;
+    iperm = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: iperm"); 
+    for (i=0; i<nrows; i++)
+      iperm[perm[i]] = i;
+  }
+
+  /* fill-in the information of the re-ordered matrix */
+  nrowptr[0] = jj = 0;
+  for (v=0; v<nrows; v++) {
+    u = iperm[v];
+    for (j=rowptr[u]; j<rowptr[u+1]; j++, jj++) {
+      nrowind[jj] = perm[rowind[j]];
+      nrowval[jj] = rowval[j];
+    }
+
+    if (mat->rlabels)
+      nmat->rlabels[v] = mat->rlabels[u];
+    if (mat->rmap)
+      nmat->rmap[v] = mat->rmap[u];
+    if (mat->rnorms)
+      nmat->rnorms[v] = mat->rnorms[u];
+    if (mat->rsums)
+      nmat->rsums[v] = mat->rsums[u];
+    if (mat->rsizes)
+      nmat->rsizes[v] = mat->rsizes[u];
+    if (mat->rvols)
+      nmat->rvols[v] = mat->rvols[u];
+    if (mat->rwgts)
+      nmat->rwgts[v] = mat->rwgts[u];
+
+    if (mat->clabels)
+      nmat->clabels[v] = mat->clabels[u];
+    if (mat->cmap)
+      nmat->cmap[v] = mat->cmap[u];
+    if (mat->cnorms)
+      nmat->cnorms[v] = mat->cnorms[u];
+    if (mat->csums)
+      nmat->csums[v] = mat->csums[u];
+    if (mat->csizes)
+      nmat->csizes[v] = mat->csizes[u];
+    if (mat->cvols)
+      nmat->cvols[v] = mat->cvols[u];
+    if (mat->cwgts)
+      nmat->cwgts[v] = mat->cwgts[u];
+
+    nrowptr[v+1] = jj;
+  }
+
+
+  /* free memory */
+  if (freeperm)
+    gk_free((void **)&perm, LTERM);
+  if (freeiperm)
+    gk_free((void **)&iperm, LTERM);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the rows/columns of a symmetric
+    matrix based on a breadth-first-traversal. It can be used for re-ordering 
+    the matrix to reduce its bandwidth for better cache locality.
+
+    \param[IN]  mat is the matrix whose ordering to be computed.
+    \param[IN]  maxdegree is the maximum number of nonzeros of the rows that
+                will participate in the BFS ordering. Rows with more nonzeros
+                will be put at the front of the ordering in decreasing degree
+                order. 
+    \param[IN]  v is the starting row of the BFS. A value of -1 indicates that
+                a randomly selected row will be used.
+    \param[OUT] perm[i] stores the ID of row i in the re-ordered matrix.
+    \param[OUT] iperm[i] stores the ID of the row that corresponds to 
+                the ith vertex in the re-ordered matrix.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_csr_ComputeBFSOrderingSymmetric(gk_csr_t *mat, int maxdegree, int v, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  int i, k, nrows, first, last;
+  ssize_t j, *rowptr;
+  int32_t *rowind, *cot, *pos;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_ComputeBFSOrderingSymmetric: The matrix needs to be square.\n");
+    return;
+  }
+  if (maxdegree < mat->nrows && v != -1) {
+    fprintf(stderr, "gk_csr_ComputeBFSOrderingSymmetric: Since maxdegree node renumbering is requested the starting row should be -1.\n");
+    return;
+  }
+  if (mat->nrows <= 0)
+    return;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+
+  /* This array will function like pos + touched of the CC method */
+  pos = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBFSOrderingSymmetric: pos"));
+
+  /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. 
+     Positions from [0...first) is the current iperm[] vector of the explored rows; 
+     Positions from [first...last) is the OPEN list (i.e., visited rows);
+     Positions from [last...nrows) is the todo list. */
+  cot = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBFSOrderingSymmetric: cot"));
+
+  first = last = 0;
+
+  /* deal with maxdegree handling */
+  if (maxdegree < nrows) {
+    last = nrows;
+    for (i=nrows-1; i>=0; i--) {
+      if (rowptr[i+1]-rowptr[i] < maxdegree) {
+        cot[--last] = i;
+        pos[i] = last;
+      }
+      else {
+        cot[first++] = i;
+        pos[i] = -1;
+      }
+    }
+    GKASSERT(first == last);
+
+    if (last > 0) { /* reorder them in degree decreasing order */
+      gk_ikv_t *cand = gk_ikvmalloc(first, "gk_csr_ComputeBFSOrderingSymmetric: cand");
+
+      for (i=0; i<first; i++) {
+        k = cot[i];
+        cand[i].key = (int)(rowptr[k+1]-rowptr[k]);
+        cand[i].val = k;
+      }
+
+      gk_ikvsortd(first, cand);
+      for (i=0; i<first; i++) 
+        cot[i] = cand[i].val;
+
+      gk_free((void **)&cand, LTERM);
+    }
+
+    v = cot[last + RandomInRange(nrows-last)];
+  }
+
+
+  /* swap v with the front of the todo list */
+  cot[pos[v]] = cot[last];
+  pos[cot[last]] = pos[v];
+
+  cot[last] = v;
+  pos[v] = last;
+
+
+  /* start processing the nodes */
+  while (first < nrows) {
+    if (first == last) { /* find another starting row */
+      k = cot[last];
+      GKASSERT(pos[k] != -1);
+      pos[k] = -1; /* mark node as being visited */
+      last++;
+    }
+
+    i = cot[first++];  /* the ++ advances the explored rows */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      k = rowind[j];
+      /* if a node has already been visited, its perm[] will be -1 */
+      if (pos[k] != -1) {
+        /* pos[k] is the location within iperm of where k resides (it is in the 'todo' part); 
+           It is placed in that location cot[last] (end of OPEN list) that we 
+           are about to overwrite and update pos[cot[last]] to reflect that. */
+        cot[pos[k]]    = cot[last]; /* put the head of the todo list to 
+                                       where k was in the todo list */
+        pos[cot[last]] = pos[k];    /* update perm to reflect the move */
+
+        cot[last++] = k;  /* put node at the end of the OPEN list */
+        pos[k]      = -1; /* mark node as being visited */
+      }
+    }
+  }
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    /* use the 'pos' array to build the perm array */
+    for (i=0; i<nrows; i++)
+      pos[cot[i]] = i;
+
+    *r_perm = pos;
+    pos = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    *r_iperm = cot;
+    cot = NULL;
+  }
+
+  /* cleanup memory */
+  gk_free((void **)&pos, &cot, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the rows of a symmetric matrix
+    based on a best-first-traversal. It can be used for re-ordering the matrix
+    to reduce its bandwidth for better cache locality.
+
+    \param[IN]  mat is the matrix structure.
+    \param[IN]  v is the starting row of the best-first traversal.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a row.
+    \param[OUT] perm[i] stores the ID of row i in the re-ordered matrix.
+    \param[OUT] iperm[i] stores the ID of the row that corresponds to 
+                the ith row in the re-ordered matrix.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_csr_ComputeBestFOrderingSymmetric(gk_csr_t *mat, int v, int type, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  ssize_t j, jj, *rowptr;
+  int i, k, u, nrows, nopen, ntodo;
+  int32_t *rowind, *perm, *degrees, *wdegrees, *sod, *level, *ot, *pos;
+  gk_i32pq_t *queue;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_ComputeBestFOrderingSymmetric: The matrix needs to be square.\n");
+    return;
+  }
+  if (mat->nrows <= 0)
+    return;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+
+
+  /* the degree of the vertices in the closed list */
+  degrees = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: degrees");
+
+  /* the weighted degree of the vertices in the closed list for type==3 */
+  wdegrees = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: wdegrees");
+
+  /* the sum of differences for type==4 */
+  sod = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: sod");
+
+  /* the encountering level of a vertex type==5 */
+  level = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: level");
+
+  /* The open+todo list of vertices. 
+     The vertices from [0..nopen] are the open vertices.
+     The vertices from [nopen..ntodo) are the todo vertices.
+     */
+  ot = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBestFOrderingSymmetric: ot"));
+
+  /* For a vertex that has not been explored, pos[i] is the position in the ot list. */
+  pos = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBestFOrderingSymmetric: pos"));
+
+  /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */
+  perm = gk_i32smalloc(nrows, -1, "gk_csr_ComputeBestFOrderingSymmetric: perm");
+
+  /* create the queue and put the starting vertex in it */
+  queue = gk_i32pqCreate(nrows);
+  gk_i32pqInsert(queue, v, 1);
+
+  /* put v at the front of the open list */
+  pos[0] = ot[0] = v;
+  pos[v] = ot[v] = 0;
+  nopen = 1;
+  ntodo = nrows;
+
+  /* start processing the nodes */
+  for (i=0; i<nrows; i++) {
+    if (nopen == 0) { /* deal with non-connected graphs */
+      gk_i32pqInsert(queue, ot[0], 1);  
+      nopen++;
+    }
+
+    if ((v = gk_i32pqGetTop(queue)) == -1)
+      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);
+
+    if (perm[v] != -1)
+      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
+    perm[v] = i;
+
+    if (ot[pos[v]] != v)
+      gk_errexit(SIGERR, "Something went wrong [ot[pos[%d]]!=%d.\n", v, v);
+    if (pos[v] >= nopen)
+      gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen);
+
+    /* remove v from the open list and re-arrange the todo part of the list */
+    ot[pos[v]]       = ot[nopen-1];
+    pos[ot[nopen-1]] = pos[v];
+    if (ntodo > nopen) {
+      ot[nopen-1]      = ot[ntodo-1];
+      pos[ot[ntodo-1]] = nopen-1;
+    }
+    nopen--;
+    ntodo--;
+
+    for (j=rowptr[v]; j<rowptr[v+1]; j++) {
+      u = rowind[j];
+      if (perm[u] == -1) {
+        /* update ot list, if u is not in the open list by putting it at the end
+           of the open list. */
+        if (degrees[u] == 0) {
+          ot[pos[u]]     = ot[nopen];
+          pos[ot[nopen]] = pos[u];
+          ot[nopen]      = u;
+          pos[u]         = nopen;
+          nopen++;
+
+          level[u] = level[v]+1;
+          gk_i32pqInsert(queue, u, 0);  
+        }
+
+
+        /* update the in-closed degree */
+        degrees[u]++;
+
+        /* update the queues based on the type */
+        switch (type) {
+          case 1: /* DFS */
+            gk_i32pqUpdate(queue, u, 1000*(i+1)+degrees[u]);
+            break;
+
+          case 2: /* Max in closed degree */
+            gk_i32pqUpdate(queue, u, degrees[u]);
+            break;
+
+          case 3: /* Sum of orders in closed list */
+            wdegrees[u] += i;
+            gk_i32pqUpdate(queue, u, wdegrees[u]);
+            break;
+
+          case 4: /* Sum of order-differences */
+            /* this is handled at the end of the loop */
+            ;
+            break;
+
+          case 5: /* BFS with in degree priority */
+            gk_i32pqUpdate(queue, u, -(1000*level[u] - degrees[u]));
+            break;
+
+          case 6: /* Hybrid of 1+2 */
+            gk_i32pqUpdate(queue, u, (i+1)*degrees[u]);
+            break;
+
+          default:
+            ;
+        }
+      }
+    }
+
+    if (type == 4) { /* update all the vertices in the open list */
+      for (j=0; j<nopen; j++) {
+        u = ot[j];
+        if (perm[u] != -1)
+          gk_errexit(SIGERR, "For i=%d, the open list contains a closed row: ot[%zd]=%d, perm[%d]=%d.\n", i, j, u, u, perm[u]);
+        sod[u] += degrees[u];
+        if (i<1000 || i%25==0)
+          gk_i32pqUpdate(queue, u, sod[u]);
+      }
+    }
+
+    /*
+    for (j=0; j<ntodo; j++) {
+      if (pos[ot[j]] != j)
+        gk_errexit(SIGERR, "pos[ot[%zd]] != %zd.\n", j, j);
+    }
+    */
+
+  }
+
+
+  /* time to decide what to return */
+  if (r_iperm != NULL) {
+    /* use the 'degrees' array to build the iperm array */
+    for (i=0; i<nrows; i++)
+      degrees[perm[i]] = i;
+
+    *r_iperm = degrees;
+    degrees = NULL;
+  }
+
+  if (r_perm != NULL) {
+    *r_perm = perm;
+    perm = NULL;
+  }
+
+
+
+
+  /* cleanup memory */
+  gk_i32pqDestroy(queue);
+  gk_free((void **)&perm, &degrees, &wdegrees, &sod, &ot, &pos, &level, LTERM);
+
+}
+
diff --git a/error.c b/error.c
new file mode 100644
index 0000000..e2a18cf
--- /dev/null
+++ b/error.c
@@ -0,0 +1,214 @@
+/*!
+\file  error.c
+\brief Various error-handling functions
+
+This file contains functions dealing with error reporting and termination
+
+\author George
+\date 1/1/2007
+\version\verbatim $Id: error.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#define _GK_ERROR_C_  /* this is needed to properly declare the gk_jub* variables
+                         as an extern function in GKlib.h */
+
+#include <GKlib.h>
+
+
+/* These are the jmp_buf for the graceful exit in case of severe errors.
+   Multiple buffers are defined to allow for recursive invokation. */
+#define MAX_JBUFS 128
+__thread int gk_cur_jbufs=-1;
+__thread jmp_buf gk_jbufs[MAX_JBUFS];
+__thread jmp_buf gk_jbuf;
+
+typedef void (*gksighandler_t)(int);
+
+/* These are the holders of the old singal handlers for the trapped signals */
+static __thread gksighandler_t old_SIGMEM_handler;  /* Custom signal */
+static __thread gksighandler_t old_SIGERR_handler;  /* Custom signal */
+static __thread gksighandler_t old_SIGMEM_handlers[MAX_JBUFS];  /* Custom signal */
+static __thread gksighandler_t old_SIGERR_handlers[MAX_JBUFS];  /* Custom signal */
+
+/* The following is used to control if the gk_errexit() will actually abort or not.
+   There is always a single copy of this variable */
+static int gk_exit_on_error = 1;
+
+
+/*************************************************************************/
+/*! This function sets the gk_exit_on_error variable 
+ */
+/*************************************************************************/
+void gk_set_exit_on_error(int value)
+{
+  gk_exit_on_error = value;
+}
+
+
+
+/*************************************************************************/
+/*! This function prints an error message and exits  
+ */
+/*************************************************************************/
+void errexit(char *f_str,...)
+{
+  va_list argp;
+
+  va_start(argp, f_str);
+  vfprintf(stderr, f_str, argp);
+  va_end(argp);
+
+  if (strlen(f_str) == 0 || f_str[strlen(f_str)-1] != '\n')
+        fprintf(stderr,"\n");
+  fflush(stderr);
+
+  if (gk_exit_on_error)
+    exit(-2);
+
+  /* abort(); */
+}
+
+
+/*************************************************************************/
+/*! This function prints an error message and raises a signum signal
+ */
+/*************************************************************************/
+void gk_errexit(int signum, char *f_str,...)
+{
+  va_list argp;
+
+  va_start(argp, f_str);
+  vfprintf(stderr, f_str, argp);
+  va_end(argp);
+
+  fprintf(stderr,"\n");
+  fflush(stderr);
+
+  if (gk_exit_on_error)
+    raise(signum);
+}
+
+
+/***************************************************************************/
+/*! This function sets a number of signal handlers and sets the return point 
+    of a longjmp
+*/
+/***************************************************************************/
+int gk_sigtrap() 
+{
+  if (gk_cur_jbufs+1 >= MAX_JBUFS)
+    return 0;
+
+  gk_cur_jbufs++;
+
+  old_SIGMEM_handlers[gk_cur_jbufs]  = signal(SIGMEM,  gk_sigthrow);
+  old_SIGERR_handlers[gk_cur_jbufs]  = signal(SIGERR,  gk_sigthrow);
+
+  return 1;
+}
+  
+
+/***************************************************************************/
+/*! This function sets the handlers for the signals to their default handlers
+ */
+/***************************************************************************/
+int gk_siguntrap() 
+{
+  if (gk_cur_jbufs == -1)
+    return 0;
+
+  signal(SIGMEM,  old_SIGMEM_handlers[gk_cur_jbufs]);
+  signal(SIGERR,  old_SIGERR_handlers[gk_cur_jbufs]);
+
+  gk_cur_jbufs--;
+
+  return 1;
+}
+  
+
+/*************************************************************************/
+/*! This function is the custome signal handler, which all it does is to
+    perform a longjump to the most recent saved environment 
+ */
+/*************************************************************************/
+void gk_sigthrow(int signum)
+{
+  longjmp(gk_jbufs[gk_cur_jbufs], signum);
+}
+  
+
+/***************************************************************************
+* This function sets a number of signal handlers and sets the return point 
+* of a longjmp
+****************************************************************************/
+void gk_SetSignalHandlers() 
+{
+  old_SIGMEM_handler = signal(SIGMEM,  gk_NonLocalExit_Handler);
+  old_SIGERR_handler = signal(SIGERR,  gk_NonLocalExit_Handler);
+}
+  
+
+/***************************************************************************
+* This function sets the handlers for the signals to their default handlers
+****************************************************************************/
+void gk_UnsetSignalHandlers() 
+{
+  signal(SIGMEM,  old_SIGMEM_handler);
+  signal(SIGERR,  old_SIGERR_handler);
+}
+  
+
+/*************************************************************************
+* This function is the handler for SIGUSR1 that implements the cleaning up 
+* process prior to a non-local exit.
+**************************************************************************/
+void gk_NonLocalExit_Handler(int signum)
+{
+  longjmp(gk_jbuf, signum);
+}
+  
+
+/*************************************************************************/
+/*! \brief Thread-safe implementation of strerror() */
+/**************************************************************************/
+char *gk_strerror(int errnum)
+{
+#if defined(WIN32) || defined(__MINGW32__)
+  return strerror(errnum);
+#else 
+#ifndef SUNOS
+  static __thread char buf[1024];
+
+  strerror_r(errnum, buf, 1024);
+
+  buf[1023] = '\0';
+  return buf;
+#else
+  return strerror(errnum);
+#endif
+#endif
+}
+
+
+
+/*************************************************************************
+* This function prints a backtrace of calling functions
+**************************************************************************/
+void PrintBackTrace()
+{
+#ifdef HAVE_EXECINFO_H
+  void *array[10];
+  int i, size;
+  char **strings;
+
+  size = backtrace(array, 10);
+  strings = backtrace_symbols(array, size);
+  
+  printf("Obtained %d stack frames.\n", size);
+  for (i=0; i<size; i++) {
+    printf("%s\n", strings[i]);
+  }
+  free(strings);
+#endif
+}
diff --git a/evaluate.c b/evaluate.c
new file mode 100644
index 0000000..ce805ce
--- /dev/null
+++ b/evaluate.c
@@ -0,0 +1,132 @@
+/*!
+  \file  evaluate.c
+  \brief Various routines to evaluate classification performance
+
+  \author George
+  \date 9/23/2008
+  \version\verbatim $Id: evaluate.c 13328 2012-12-31 14:57:40Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/**********************************************************************
+ * This function computes the max accuracy score of a ranked list,
+ * given +1/-1 class list
+ **********************************************************************/
+float ComputeAccuracy(int n, gk_fkv_t *list)
+{
+  int i, P, N, TP, FN = 0;
+  float bAccuracy = 0.0;
+  float acc;
+  
+  for (P=0, i=0;i<n;i++)
+    P += (list[i].val == 1? 1 : 0);
+  N = n - P;
+  
+  TP = FN = 0;
+  
+  for(i=0; i<n; i++){
+    if (list[i].val == 1)
+      TP++; 
+    else
+      FN++;
+    
+    acc = (TP + N - FN) * 100.0/ (P + N) ;
+    if (acc > bAccuracy)
+      bAccuracy = acc;
+  }
+  
+  return bAccuracy;
+}
+
+
+/*****************************************************************************
+ * This function computes the ROC score of a ranked list, given a +1/-1 class
+ * list.
+ ******************************************************************************/
+float ComputeROCn(int n, int maxN, gk_fkv_t *list)
+{
+  int i, P, TP, FP, TPprev, FPprev, AUC;
+  float prev;
+  
+  FP = TP = FPprev = TPprev = AUC = 0;
+  prev = list[0].key -1;
+  
+  for (P=0, i=0; i<n; i++)
+    P += (list[i].val == 1 ? 1 : 0);
+  
+  for (i=0; i<n && FP < maxN; i++) {
+    if (list[i].key != prev) {
+      AUC += (TP+TPprev)*(FP-FPprev)/2;
+      prev = list[i].key;
+      FPprev = FP;
+      TPprev = TP;
+    }
+    if (list[i].val == 1) 
+      TP++;
+    else {
+      FP++;
+    }
+  }
+  AUC += (TP+TPprev)*(FP-FPprev)/2;
+
+  return (TP*FP > 0 ? (float)(1.0*AUC/(P*FP)) : 0.0);
+}
+
+
+/*****************************************************************************
+* This function computes the median rate of false positive for each positive
+* instance.
+******************************************************************************/
+float ComputeMedianRFP(int n, gk_fkv_t *list)
+{
+  int i, P, N, TP, FP;
+
+  P = N = 0;
+  for (i=0; i<n; i++) {
+    if (list[i].val == 1)
+      P++;
+    else
+      N++;
+  }
+  
+  FP = TP = 0;
+  for (i=0; i<n && TP < (P+1)/2; i++) {
+    if (list[i].val == 1) 
+      TP++;
+    else 
+      FP++;
+  }
+  
+  return 1.0*FP/N;
+}
+
+/*********************************************************
+ * Compute the mean
+ ********************************************************/
+float ComputeMean (int n, float *values)
+{
+  int i;
+  float mean = 0.0;
+
+  for(i=0; i < n; i++)
+    mean += values[i];
+  
+  return 1.0 * mean/ n;
+}
+
+/********************************************************
+ * Compute the standard deviation
+ ********************************************************/
+float ComputeStdDev(int  n, float *values)
+{
+  int i;
+  float mean = ComputeMean(n, values);
+  float stdDev = 0;
+  
+  for(i=0;i<n;i++){
+    stdDev += (values[i] - mean)* (values[i] - mean);
+  }
+  
+  return sqrt(1.0 * stdDev/n);
+}
diff --git a/fkvkselect.c b/fkvkselect.c
new file mode 100644
index 0000000..b1238ce
--- /dev/null
+++ b/fkvkselect.c
@@ -0,0 +1,142 @@
+/*!
+\file  dfkvkselect.c
+\brief Sorts only the largest k values
+ 
+\date   Started 7/14/00
+\author George
+\version\verbatim $Id: fkvkselect.c 10711 2011-08-31 22:23:04Z karypis $\endverbatim
+*/
+
+
+#include <GKlib.h>
+
+/* Byte-wise swap two items of size SIZE. */
+#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0)
+
+
+/******************************************************************************/
+/*! This function puts the 'topk' largest values in the beginning of the array */
+/*******************************************************************************/
+int gk_dfkvkselect(size_t n, int topk, gk_fkv_t *cand)
+{
+  int i, j, lo, hi, mid;
+  gk_fkv_t stmp;
+  float pivot;
+
+  if (n <= topk)
+    return n; /* return if the array has fewer elements than we want */
+
+  for (lo=0, hi=n-1; lo < hi;) {
+    mid = lo + ((hi-lo) >> 1);
+
+    /* select the median */
+    if (cand[lo].key < cand[mid].key)
+      mid = lo;
+    if (cand[hi].key > cand[mid].key)
+      mid = hi;
+    else 
+      goto jump_over;
+    if (cand[lo].key < cand[mid].key)
+      mid = lo;
+
+jump_over:
+    QSSWAP(cand[mid], cand[hi], stmp);
+    pivot = cand[hi].key;
+
+    /* the partitioning algorithm */
+    for (i=lo-1, j=lo; j<hi; j++) {
+      if (cand[j].key >= pivot) {
+        i++;
+        QSSWAP(cand[i], cand[j], stmp);
+      }
+    }
+    i++;
+    QSSWAP(cand[i], cand[hi], stmp);
+
+
+    if (i > topk) 
+      hi = i-1;
+    else if (i < topk)
+      lo = i+1;
+    else
+      break;
+  }
+
+/*
+  if (cand[lo].key < cand[hi].key)
+    printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key);
+
+
+  for (i=topk; i<n; i++) {
+    for (j=0; j<topk; j++)
+      if (cand[i].key > cand[j].key)
+        printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi);
+  }
+*/
+
+  return topk;
+}
+
+
+/******************************************************************************/
+/*! This function puts the 'topk' smallest values in the beginning of the array */
+/*******************************************************************************/
+int gk_ifkvkselect(size_t n, int topk, gk_fkv_t *cand)
+{
+  int i, j, lo, hi, mid;
+  gk_fkv_t stmp;
+  float pivot;
+
+  if (n <= topk)
+    return n; /* return if the array has fewer elements than we want */
+
+  for (lo=0, hi=n-1; lo < hi;) {
+    mid = lo + ((hi-lo) >> 1);
+
+    /* select the median */
+    if (cand[lo].key > cand[mid].key)
+      mid = lo;
+    if (cand[hi].key < cand[mid].key)
+      mid = hi;
+    else 
+      goto jump_over;
+    if (cand[lo].key > cand[mid].key)
+      mid = lo;
+
+jump_over:
+    QSSWAP(cand[mid], cand[hi], stmp);
+    pivot = cand[hi].key;
+
+    /* the partitioning algorithm */
+    for (i=lo-1, j=lo; j<hi; j++) {
+      if (cand[j].key <= pivot) {
+        i++;
+        QSSWAP(cand[i], cand[j], stmp);
+      }
+    }
+    i++;
+    QSSWAP(cand[i], cand[hi], stmp);
+
+
+    if (i > topk) 
+      hi = i-1;
+    else if (i < topk)
+      lo = i+1;
+    else
+      break;
+  }
+
+/*
+  if (cand[lo].key > cand[hi].key)
+    printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key);
+
+
+  for (i=topk; i<n; i++) {
+    for (j=0; j<topk; j++)
+      if (cand[i].key < cand[j].key)
+        printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi);
+  }
+*/
+
+  return topk;
+}
diff --git a/fs.c b/fs.c
new file mode 100644
index 0000000..21081dd
--- /dev/null
+++ b/fs.c
@@ -0,0 +1,225 @@
+/*!
+\file  fs.c
+\brief Various file-system functions.
+
+This file contains various functions that deal with interfacing with 
+the filesystem in a portable way.
+
+\date Started 4/10/95
+\author George
+\version\verbatim $Id: fs.c 14332 2013-05-18 12:22:57Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************
+* This function checks if a file exists
+**************************************************************************/
+int gk_fexists(char *fname)
+{
+  struct stat status;
+
+  if (stat(fname, &status) == -1)
+    return 0;
+
+  return S_ISREG(status.st_mode);
+}
+
+
+/*************************************************************************
+* This function checks if a directory exists
+**************************************************************************/
+int gk_dexists(char *dirname)
+{
+  struct stat status;
+
+  if (stat(dirname, &status) == -1)
+    return 0;
+
+  return S_ISDIR(status.st_mode);
+}
+
+
+/*************************************************************************/
+/*! \brief Returns the size of the file in bytes
+
+This function returns the size of a file as a 64 bit integer. If there 
+were any errors in stat'ing the file, -1 is returned.
+\note That due to the -1 return code, the maximum file size is limited to
+      63 bits (which I guess is okay for now).
+*/
+/**************************************************************************/
+ssize_t gk_getfsize(char *filename)
+{
+  struct stat status;
+
+  if (stat(filename, &status) == -1) 
+    return -1;
+
+  return (size_t)(status.st_size);
+}
+
+
+/*************************************************************************/
+/*! This function gets some basic statistics about the file. 
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+    \param r_ntokens is the number of tokens in the file. If it is NULL,
+           this information is not returned.
+    \param r_max_nlntokens is the maximum number of tokens in any line
+           in the file. If it is NULL this information is not returned.
+    \param r_nbytes is the number of bytes in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, 
+        size_t *r_max_nlntokens, size_t *r_nbytes)
+{
+  size_t nlines=0, ntokens=0, max_nlntokens=0, nbytes=0, oldntokens=0, nread;
+  int intoken=0;
+  char buffer[4097], *cptr;
+  FILE *fpin;
+
+  fpin = gk_fopen(fname, "r", "gk_GetFileStats");
+
+  while (!feof(fpin)) {
+    nread = fread(buffer, sizeof(char), 4096, fpin);
+    nbytes += nread;
+
+    buffer[nread] = '\0';  /* There is space for this one */
+    for (cptr=buffer; *cptr!='\0'; cptr++) {
+      if (*cptr == '\n') {
+        nlines++;
+        ntokens += intoken;
+        intoken = 0;
+        if (max_nlntokens < ntokens-oldntokens)
+          max_nlntokens = ntokens-oldntokens;
+        oldntokens = ntokens;
+      }
+      else if (*cptr == ' ' || *cptr == '\t') {
+        ntokens += intoken;
+        intoken = 0;
+      }
+      else {
+        intoken = 1;
+      }
+    }
+  }
+  ntokens += intoken;
+  if (max_nlntokens < ntokens-oldntokens)
+    max_nlntokens = ntokens-oldntokens;
+
+  gk_fclose(fpin);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+  if (r_ntokens != NULL)
+    *r_ntokens = ntokens;
+  if (r_max_nlntokens != NULL)
+    *r_max_nlntokens = max_nlntokens;
+  if (r_nbytes != NULL)
+    *r_nbytes  = nbytes;
+}
+
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and just returns a string containing just the basename of the file.
+* The basename is derived from the actual filename by stripping the last
+* .ext part.
+**************************************************************************/
+char *gk_getbasename(char *path)
+{
+  char *startptr, *endptr;
+  char *basename;
+
+  if ((startptr = strrchr(path, '/')) == NULL) 
+    startptr = path;
+  else 
+    startptr = startptr+1;
+
+  basename = gk_strdup(startptr);
+
+  if ((endptr = strrchr(basename, '.')) != NULL) 
+    *endptr = '\0';
+
+  return basename;
+}
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and just returns a string corresponding to its file extension. The
+* extension of a file is considered to be the string right after the 
+* last '.' character.
+**************************************************************************/
+char *gk_getextname(char *path)
+{
+  char *startptr;
+
+  if ((startptr = strrchr(path, '.')) == NULL) 
+    return gk_strdup(path);
+  else 
+    return gk_strdup(startptr+1);
+}
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and just returns a string containing just the filename.
+**************************************************************************/
+char *gk_getfilename(char *path)
+{
+  char *startptr;
+
+  if ((startptr = strrchr(path, '/')) == NULL) 
+    return gk_strdup(path);
+  else 
+    return gk_strdup(startptr+1);
+}
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and extracts the directory path component if it exists, otherwise it
+* returns "./" as the path. The memory for it is dynamically allocated.
+**************************************************************************/
+char *getpathname(char *path)
+{
+  char *endptr, *tmp;
+
+  if ((endptr = strrchr(path, '/')) == NULL) {
+    return gk_strdup(".");
+  }
+  else  {
+    tmp = gk_strdup(path);
+    *(strrchr(tmp, '/')) = '\0';
+    return tmp;
+  }
+}
+
+
+
+/*************************************************************************
+* This function creates a path
+**************************************************************************/
+int gk_mkpath(char *pathname)
+{
+  char tmp[2048];
+
+  sprintf(tmp, "mkdir -p %s", pathname);
+  return system(tmp);
+}
+
+
+/*************************************************************************
+* This function deletes a directory tree and all of its contents
+**************************************************************************/
+int gk_rmpath(char *pathname)
+{
+  char tmp[2048];
+
+  sprintf(tmp, "rm -r %s", pathname);
+  return system(tmp);
+}
diff --git a/getopt.c b/getopt.c
new file mode 100644
index 0000000..2e7e042
--- /dev/null
+++ b/getopt.c
@@ -0,0 +1,855 @@
+/*************************************************************************/
+/*! \file getopt.c
+\brief Command line parsing 
+
+This file contains a implementation of GNU's Getopt facility. The purpose
+for including it here is to ensure portability across different unix- and
+windows-based systems.
+
+\warning 
+The implementation provided here uses the \c gk_ prefix for all variables
+used by the standard Getopt facility to communicate with the program.
+So, do read the documentation here.
+
+\verbatim
+   Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001
+   Free Software Foundation, Inc. This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  
+\endverbatim
+*/
+/*************************************************************************/
+
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/* Local function prototypes */
+/*************************************************************************/
+static void exchange (char **);
+static char *gk_getopt_initialize (int, char **, char *);
+static int gk_getopt_internal(int argc, char **argv, char *optstring, 
+        struct gk_option *longopts, int *longind, int long_only);
+
+
+
+/*************************************************************************/
+/*! \brief For communication arguments to the caller.
+
+This variable is set by getopt to point at the value of the option argument, 
+for those options that accept arguments.
+*/
+/*************************************************************************/
+char *gk_optarg;
+
+
+/*************************************************************************/
+/*! \brief Index in ARGV of the next element to be scanned. 
+
+This variable is set by getopt to the index of the next element of the argv 
+array to be processed. Once getopt has found all of the option arguments, 
+you can use this variable to determine where the remaining non-option arguments 
+begin. 
+*/
+/*************************************************************************/
+int gk_optind = 1; 
+
+
+/*************************************************************************/
+/*! \brief Controls error reporting for unrecognized options.  
+
+If the value of this variable is nonzero, then getopt prints an error 
+message to the standard error stream if it encounters an unknown option 
+character or an option with a missing required argument. This is the default 
+behavior. If you set this variable to zero, getopt does not print any messages,
+but it still returns the character ? to indicate an error.
+*/
+/*************************************************************************/
+int gk_opterr = 1;
+
+
+/*************************************************************************/
+/*! \brief Stores unknown option characters
+
+When getopt encounters an unknown option character or an option with a 
+missing required argument, it stores that option character in this 
+variable. You can use this for providing your own diagnostic messages.
+*/
+/*************************************************************************/
+int gk_optopt = '?';
+
+
+/*************************************************************************/
+/*
+Records that the getopt facility has been initialized.
+*/
+/*************************************************************************/
+int gk_getopt_initialized;
+
+
+/*************************************************************************/
+/*
+The next char to be scanned in the option-element in which the last option 
+character we returned was found.  This allows us to pick up the scan where 
+we left off.
+
+If this is zero, or a null string, it means resume the scan by advancing 
+to the next ARGV-element.  
+*/
+/*************************************************************************/
+static char *nextchar;
+
+
+/*************************************************************************/
+/*
+Value of POSIXLY_CORRECT environment variable.  
+*/
+/*************************************************************************/
+static char *posixly_correct;
+
+
+/*************************************************************************/
+/*
+Describe how to deal with options that follow non-option ARGV-elements.
+
+If the caller did not specify anything, the default is REQUIRE_ORDER if 
+the environment variable POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+REQUIRE_ORDER means don't recognize them as options; stop option processing 
+when the first non-option is seen.  This is what Unix does.  This mode of 
+operation is selected by either setting the environment variable 
+POSIXLY_CORRECT, or using `+' as the first character of the list of 
+option characters.
+
+PERMUTE is the default.  We permute the contents of ARGV as we scan, so 
+that eventually all the non-options are at the end.  This allows options
+to be given in any order, even with programs that were not written to
+expect this.
+
+RETURN_IN_ORDER is an option available to programs that were written
+to expect options and other ARGV-elements in any order and that care 
+about the ordering of the two.  We describe each non-option ARGV-element
+as if it were the argument of an option with character code 1.
+Using `-' as the first character of the list of option characters
+selects this mode of operation.
+
+The special argument `--' forces an end of option-scanning regardless
+of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+`--' can cause `getopt' to return -1 with `gk_optind' != ARGC.  
+*/
+/*************************************************************************/
+static enum
+{
+  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+
+
+/*************************************************************************/
+/* 
+Describe the part of ARGV that contains non-options that have
+been skipped.  `first_nonopt' is the index in ARGV of the first of them;
+`last_nonopt' is the index after the last of them.  
+*/
+/*************************************************************************/
+static int first_nonopt;
+static int last_nonopt;
+
+
+
+
+
+/*************************************************************************/
+/*
+Handle permutation of arguments.  
+
+Exchange two adjacent subsequences of ARGV. 
+One subsequence is elements [first_nonopt,last_nonopt)
+which contains all the non-options that have been skipped so far.
+The other is elements [last_nonopt,gk_optind), which contains all
+the options processed since those non-options were skipped.
+
+`first_nonopt' and `last_nonopt' are relocated so that they describe
+the new indices of the non-options in ARGV after they are moved.  
+*/
+/*************************************************************************/
+static void exchange (char **argv)
+{
+  int bottom = first_nonopt;
+  int middle = last_nonopt;
+  int top = gk_optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+  while (top > middle && middle > bottom) {
+    if (top - middle > middle - bottom) {
+      /* Bottom segment is the short one.  */
+      int len = middle - bottom;
+      register int i;
+
+      /* Swap it with the top part of the top segment.  */
+      for (i = 0; i < len; i++) {
+	tem = argv[bottom + i];
+	argv[bottom + i] = argv[top - (middle - bottom) + i];
+	argv[top - (middle - bottom) + i] = tem;
+      }
+      /* Exclude the moved bottom segment from further swapping.  */
+      top -= len;
+    }
+    else {
+      /* Top segment is the short one.  */
+      int len = top - middle;
+      register int i;
+
+      /* Swap it with the bottom part of the bottom segment.  */
+      for (i = 0; i < len; i++) {
+        tem = argv[bottom + i];
+        argv[bottom + i] = argv[middle + i];
+        argv[middle + i] = tem;
+      }
+      /* Exclude the moved top segment from further swapping.  */
+      bottom += len;
+    }
+  }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  first_nonopt += (gk_optind - last_nonopt);
+  last_nonopt = gk_optind;
+}
+
+
+
+/*************************************************************************/
+/*
+Initialize the internal data when the first call is made.  
+*/
+/*************************************************************************/
+static char *gk_getopt_initialize (int argc, char **argv, char *optstring)
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  first_nonopt = last_nonopt = gk_optind;
+
+  nextchar = NULL;
+
+  posixly_correct = getenv("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+  if (optstring[0] == '-') {
+    ordering = RETURN_IN_ORDER;
+    ++optstring;
+  }
+  else if (optstring[0] == '+') {
+    ordering = REQUIRE_ORDER;
+    ++optstring;
+  }
+  else if (posixly_correct != NULL)
+    ordering = REQUIRE_ORDER;
+  else
+    ordering = PERMUTE;
+
+  return optstring;
+}
+
+
+/*************************************************************************/
+/*
+   Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `gk_optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns -1.
+   Then `gk_optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `gk_opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `gk_optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `gk_optarg', otherwise `gk_optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   LONGOPTS is a vector of `struct gk_option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  
+*/
+/*************************************************************************/
+static int gk_getopt_internal(int argc, char **argv, char *optstring, 
+        struct gk_option *longopts, int *longind, int long_only)
+{
+  int print_errors = gk_opterr;
+  if (optstring[0] == ':')
+    print_errors = 0;
+
+  if (argc < 1)
+    return -1;
+
+  gk_optarg = NULL;
+
+  if (gk_optind == 0 || !gk_getopt_initialized) {
+    if (gk_optind == 0)
+      gk_optind = 1;	/* Don't scan ARGV[0], the program name.  */
+
+    optstring = gk_getopt_initialize (argc, argv, optstring);
+    gk_getopt_initialized = 1;
+  }
+
+  /* Test whether ARGV[gk_optind] points to a non-option argument.
+     Either it does not have option syntax, or there is an environment flag
+     from the shell indicating it is not an option.  The later information
+     is only used when the used in the GNU libc.  */
+# define NONOPTION_P (argv[gk_optind][0] != '-' || argv[gk_optind][1] == '\0')
+
+  if (nextchar == NULL || *nextchar == '\0') {
+    /* Advance to the next ARGV-element.  */
+
+    /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+       moved back by the user (who may also have changed the arguments).  */
+    if (last_nonopt > gk_optind)
+      last_nonopt = gk_optind;
+    if (first_nonopt > gk_optind)
+      first_nonopt = gk_optind;
+
+    if (ordering == PERMUTE) {
+      /* If we have just processed some options following some non-options,
+	 exchange them so that the options come first.  */
+
+      if (first_nonopt != last_nonopt && last_nonopt != gk_optind)
+	exchange ((char **) argv);
+      else if (last_nonopt != gk_optind)
+	first_nonopt = gk_optind;
+
+      /* Skip any additional non-options
+	 and extend the range of non-options previously skipped.  */
+
+      while (gk_optind < argc && NONOPTION_P)
+        gk_optind++;
+
+      last_nonopt = gk_optind;
+    }
+
+    /* The special ARGV-element `--' means premature end of options.
+       Skip it like a null option,
+       then exchange with previous non-options as if it were an option,
+       then skip everything else like a non-option.  */
+
+    if (gk_optind != argc && !strcmp (argv[gk_optind], "--")) {
+      gk_optind++;
+
+      if (first_nonopt != last_nonopt && last_nonopt != gk_optind)
+        exchange ((char **) argv);
+      else if (first_nonopt == last_nonopt)
+        first_nonopt = gk_optind;
+      last_nonopt = argc;
+
+      gk_optind = argc;
+    }
+
+    /* If we have done all the ARGV-elements, stop the scan
+       and back over any non-options that we skipped and permuted.  */
+
+    if (gk_optind == argc) {
+      /* Set the next-arg-index to point at the non-options
+	 that we previously skipped, so the caller will digest them.  */
+      if (first_nonopt != last_nonopt)
+	gk_optind = first_nonopt;
+      return -1;
+    }
+
+    /* If we have come to a non-option and did not permute it,
+       either stop the scan or describe it to the caller and pass it by.  */
+
+    if (NONOPTION_P) {
+      if (ordering == REQUIRE_ORDER)
+	return -1;
+      gk_optarg = argv[gk_optind++];
+      return 1;
+    }
+
+    /* We have found another option-ARGV-element.
+       Skip the initial punctuation.  */
+
+    nextchar = (argv[gk_optind] + 1 + (longopts != NULL && argv[gk_optind][1] == '-'));
+  }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL && (argv[gk_optind][1] == '-' || (long_only && (argv[gk_optind][2] || !strchr(optstring, argv[gk_optind][1]))))) {
+    char *nameend;
+    struct gk_option *p;
+    struct gk_option *pfound = NULL;
+    int exact = 0;
+    int ambig = 0;
+    int indfound = -1;
+    int option_index;
+
+    for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+      /* Do nothing.  */ ;
+
+    /* Test all long options for either exact match or abbreviated matches.  */
+    for (p = longopts, option_index = 0; p->name; p++, option_index++) {
+      if (!strncmp (p->name, nextchar, nameend - nextchar)) {
+        if ((unsigned int) (nameend - nextchar) == (unsigned int) strlen (p->name)) {
+	  /* Exact match found.  */
+	  pfound = p;
+	  indfound = option_index;
+	  exact = 1;
+	  break;
+	}
+	else if (pfound == NULL) {
+          /* First nonexact match found.  */
+	  pfound = p;
+	  indfound = option_index;
+	}
+	else if (long_only || pfound->has_arg != p->has_arg || pfound->flag != p->flag || pfound->val != p->val)
+	  /* Second or later nonexact match found.  */
+	  ambig = 1;
+      }
+    }
+
+    if (ambig && !exact) {
+      if (print_errors)
+        fprintf(stderr, "%s: option `%s' is ambiguous\n", argv[0], argv[gk_optind]);
+
+      nextchar += strlen (nextchar);
+      gk_optind++;
+      gk_optopt = 0;
+      return '?';
+    }
+
+    if (pfound != NULL) {
+      option_index = indfound;
+      gk_optind++;
+      if (*nameend) {
+	/* Don't test has_arg with >, because some C compilers don't allow it to be used on enums.  */
+	if (pfound->has_arg)
+	  gk_optarg = nameend + 1;
+	else {
+	  if (print_errors) {
+	    if (argv[gk_optind - 1][1] == '-')
+	      /* --option */
+	      fprintf(stderr, "%s: option `--%s' doesn't allow an argument\n", argv[0], pfound->name);
+	    else
+	      /* +option or -option */
+	      fprintf(stderr, "%s: option `%c%s' doesn't allow an argument\n", argv[0], argv[gk_optind - 1][0], pfound->name);
+	  }
+
+	  nextchar += strlen (nextchar);
+
+	  gk_optopt = pfound->val;
+	  return '?';
+	}
+      }
+      else if (pfound->has_arg == 1) {
+	if (gk_optind < argc)
+	  gk_optarg = argv[gk_optind++];
+	else {
+	  if (print_errors)
+	    fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]);
+	  nextchar += strlen (nextchar);
+	  gk_optopt = pfound->val;
+	  return optstring[0] == ':' ? ':' : '?';
+	}
+      }
+      nextchar += strlen (nextchar);
+      if (longind != NULL)
+        *longind = option_index;
+      if (pfound->flag) {
+	*(pfound->flag) = pfound->val;
+	return 0;
+      }
+      return pfound->val;
+    }
+
+    /* Can't find it as a long option.  If this is not getopt_long_only,
+       or the option starts with '--' or is not a valid short
+        option, then it's an error. Otherwise interpret it as a short option.  */
+    if (!long_only || argv[gk_optind][1] == '-' || strchr(optstring, *nextchar) == NULL) {
+      if (print_errors) {
+	if (argv[gk_optind][1] == '-')
+	  /* --option */
+	  fprintf(stderr, "%s: unrecognized option `--%s'\n", argv[0], nextchar);
+	else
+	  /* +option or -option */
+	  fprintf(stderr, "%s: unrecognized option `%c%s'\n", argv[0], argv[gk_optind][0], nextchar);
+      }
+      nextchar = (char *) "";
+      gk_optind++;
+      gk_optopt = 0;
+      return '?';
+    }
+  }
+
+  /* Look at and handle the next short option-character.  */
+  {
+    char c = *nextchar++;
+    char *temp = strchr(optstring, c);
+
+    /* Increment `gk_optind' when we start to process its last character.  */
+    if (*nextchar == '\0')
+      ++gk_optind;
+
+    if (temp == NULL || c == ':') {
+      if (print_errors) {
+        if (posixly_correct)
+	  /* 1003.2 specifies the format of this message.  */
+	  fprintf(stderr, "%s: illegal option -- %c\n", argv[0], c);
+	else
+	  fprintf(stderr, "%s: invalid option -- %c\n", argv[0], c);
+      }
+      gk_optopt = c;
+      return '?';
+    }
+
+    /* Convenience. Treat POSIX -W foo same as long option --foo */
+    if (temp[0] == 'W' && temp[1] == ';') {
+      char *nameend;
+      struct gk_option *p;
+      struct gk_option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound = 0;
+      int option_index;
+
+      /* This is an option that requires an argument.  */
+      if (*nextchar != '\0') {
+	gk_optarg = nextchar;
+	/* If we end this ARGV-element by taking the rest as an arg,
+	   we must advance to the next element now.  */
+	gk_optind++;
+      }
+      else if (gk_optind == argc) {
+	if (print_errors) {
+	  /* 1003.2 specifies the format of this message.  */
+	  fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c);
+	}
+	gk_optopt = c;
+	if (optstring[0] == ':')
+	  c = ':';
+	else
+	  c = '?';
+	return c;
+      }
+      else
+	/* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument.  */
+	gk_optarg = argv[gk_optind++];
+
+      /* gk_optarg is now the argument, see if it's in the table of longopts.  */
+
+      for (nextchar = nameend = gk_optarg; *nameend && *nameend != '='; nameend++)
+	/* Do nothing.  */ ;
+
+      /* Test all long options for either exact match or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++) {
+	if (!strncmp (p->name, nextchar, nameend - nextchar)) {
+	  if ((unsigned int) (nameend - nextchar) == strlen (p->name)) {
+	    /* Exact match found.  */
+	    pfound = p;
+	    indfound = option_index;
+	    exact = 1;
+	    break;
+	  }
+	  else if (pfound == NULL) {
+	    /* First nonexact match found.  */
+	    pfound = p;
+	    indfound = option_index;
+	  }
+	  else
+	    /* Second or later nonexact match found.  */
+	    ambig = 1;
+	}
+      }
+      if (ambig && !exact) {
+	if (print_errors)
+	  fprintf(stderr, "%s: option `-W %s' is ambiguous\n", argv[0], argv[gk_optind]);
+	nextchar += strlen (nextchar);
+	gk_optind++;
+	return '?';
+      }
+      if (pfound != NULL) {
+	option_index = indfound;
+	if (*nameend) {
+	  /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums.  */
+	  if (pfound->has_arg)
+	    gk_optarg = nameend + 1;
+	  else {
+	    if (print_errors)
+	      fprintf(stderr, "%s: option `-W %s' doesn't allow an argument\n", argv[0], pfound->name);
+
+	    nextchar += strlen (nextchar);
+	    return '?';
+	  }
+	}
+	else if (pfound->has_arg == 1) {
+	  if (gk_optind < argc)
+	    gk_optarg = argv[gk_optind++];
+	  else {
+	    if (print_errors)
+	      fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]);
+	    nextchar += strlen (nextchar);
+	    return optstring[0] == ':' ? ':' : '?';
+	  }
+        }
+	nextchar += strlen (nextchar);
+	if (longind != NULL)
+	  *longind = option_index;
+	if (pfound->flag) {
+	  *(pfound->flag) = pfound->val;
+	  return 0;
+	}
+	return pfound->val;
+      }
+      nextchar = NULL;
+      return 'W';	/* Let the application handle it.   */
+    }
+
+    if (temp[1] == ':') {
+      if (temp[2] == ':') {
+	/* This is an option that accepts an argument optionally.  */
+	if (*nextchar != '\0') {
+  	  gk_optarg = nextchar;
+	  gk_optind++;
+	}
+	else
+	  gk_optarg = NULL;
+	nextchar = NULL;
+      }
+      else {
+	/* This is an option that requires an argument.  */
+	if (*nextchar != '\0') {
+	  gk_optarg = nextchar;
+	  /* If we end this ARGV-element by taking the rest as an arg, we must advance to the next element now.  */
+	  gk_optind++;
+	}
+	else if (gk_optind == argc) {
+	  if (print_errors) {
+	    /* 1003.2 specifies the format of this message.  */
+	    fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c);
+	  }
+	  gk_optopt = c;
+	  if (optstring[0] == ':')
+	    c = ':';
+	  else
+	    c = '?';
+	}
+	else
+	  /* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument.  */
+	  gk_optarg = argv[gk_optind++];
+	nextchar = NULL;
+      }
+    }
+    return c;
+  }
+}
+
+
+
+/*************************************************************************/
+/*! \brief Parse command-line arguments
+
+The gk_getopt() function gets the next option argument from the argument 
+list specified by the \c argv and \c argc arguments. Normally these values 
+come directly from the arguments received by main().
+
+\param argc is the number of command line arguments passed to main().
+\param argv is an array of strings storing the above command line 
+       arguments.
+\param options is a string that specifies the option characters that 
+       are valid for this program. An option character in this string 
+       can be followed by a colon (`:') to indicate that it takes a 
+       required argument. If an option character is followed by two 
+       colons (`::'), its argument is optional; this is a GNU extension.
+
+\return  
+It returns the option character for the next command line option. When no 
+more option arguments are available, it returns -1. There may still be 
+more non-option arguments; you must compare the external variable 
+#gk_optind against the \c argc parameter to check this.
+
+\return  
+If the option has an argument, gk_getopt() returns the argument by storing 
+it in the variable #gk_optarg. You don't ordinarily need to copy the 
+#gk_optarg string, since it is a pointer into the original \c argv array, 
+not into a static area that might be overwritten.
+
+\return  
+If gk_getopt() finds an option character in \c argv that was not included 
+in options, or a missing option argument, it returns `?' and sets the 
+external variable #gk_optopt to the actual option character. 
+If the first character of options is a colon (`:'), then gk_getopt() 
+returns `:' instead of `?' to indicate a missing option argument. 
+In addition, if the external variable #gk_opterr is nonzero (which is 
+the default), gk_getopt() prints an error message.  This variable is 
+set by gk_getopt() to point at the value of the option argument, 
+for those options that accept arguments.
+
+
+gk_getopt() has three ways to deal with options that follow non-options 
+\c argv elements. The special argument <tt>`--'</tt> forces in all cases 
+the end of option scanning.
+  - The default is to permute the contents of \c argv while scanning it 
+    so that eventually all the non-options are at the end. This allows 
+    options to be given in any order, even with programs that were not 
+    written to expect this.
+  - If the options argument string begins with a hyphen (`-'), this is 
+    treated specially. It permits arguments that are not options to be 
+    returned as if they were associated with option character `\\1'.
+  - POSIX demands the following behavior: The first non-option stops 
+    option processing. This mode is selected by either setting the 
+    environment variable POSIXLY_CORRECT or beginning the options
+    argument string with a plus sign (`+'). 
+
+*/
+/*************************************************************************/
+int gk_getopt(int argc, char **argv, char *options)
+{
+  return gk_getopt_internal(argc, argv, options, NULL, NULL, 0);
+}
+
+
+/*************************************************************************/
+/*! \brief Parse command-line arguments with long options
+
+This function accepts GNU-style long options as well as single-character 
+options. 
+
+\param argc is the number of command line arguments passed to main().
+\param argv is an array of strings storing the above command line 
+       arguments.
+\param options describes the short options to accept, just as it does 
+       in gk_getopt(). 
+\param long_options describes the long options to accept. See the 
+       defintion of ::gk_option for more information.
+\param opt_index this is a returned variable.  For any long option, 
+       gk_getopt_long() tells you the index in the array \c long_options 
+       of the options definition, by storing it into <tt>*opt_index</tt>. 
+       You can get the name of the option with <tt>longopts[*opt_index].name</tt>. 
+       So you can distinguish among long options either by the values 
+       in their val fields or by their indices. You can also distinguish 
+       in this way among long options that set flags.
+
+
+\return
+When gk_getopt_long() encounters a short option, it does the same thing 
+that gk_getopt() would do: it returns the character code for the option, 
+and stores the options argument (if it has one) in #gk_optarg.
+
+\return
+When gk_getopt_long() encounters a long option, it takes actions based 
+on the flag and val fields of the definition of that option.
+
+\return
+If flag is a null pointer, then gk_getopt_long() returns the contents 
+of val to indicate which option it found. You should arrange distinct 
+values in the val field for options with different meanings, so you 
+can decode these values after gk_getopt_long() returns. If the long 
+option is equivalent to a short option, you can use the short option's 
+character code in val.
+
+\return
+If flag is not a null pointer, that means this option should just set 
+a flag in the program. The flag is a variable of type int that you 
+define. Put the address of the flag in the flag field. Put in the 
+val field the value you would like this option to store in the flag. 
+In this case, gk_getopt_long() returns 0.
+
+\return
+When a long option has an argument, gk_getopt_long() puts the argument 
+value in the variable #gk_optarg before returning. When the option has 
+no argument, the value in #gk_optarg is a null pointer. This is
+how you can tell whether an optional argument was supplied.
+
+\return
+When gk_getopt_long() has no more options to handle, it returns -1, 
+and leaves in the variable #gk_optind the index in argv of the next 
+remaining argument. 
+*/
+/*************************************************************************/
+int gk_getopt_long( int argc, char **argv, char *options, 
+       struct gk_option *long_options, int *opt_index)
+{
+  return gk_getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+
+
+/*************************************************************************/
+/*! \brief Parse command-line arguments with only long options
+
+Like gk_getopt_long(), but '-' as well as '--' can indicate a long option.
+If an option that starts with '-' (not '--') doesn't match a long option,
+but does match a short option, it is parsed as a short option instead.  
+*/
+/*************************************************************************/
+int gk_getopt_long_only(int argc, char **argv, char *options, 
+       struct gk_option *long_options, int *opt_index)
+{
+  return gk_getopt_internal(argc, argv, options, long_options, opt_index, 1);
+}
+
diff --git a/gk_arch.h b/gk_arch.h
new file mode 100644
index 0000000..b82fb6a
--- /dev/null
+++ b/gk_arch.h
@@ -0,0 +1,70 @@
+/*!
+\file gk_arch.h
+\brief This file contains various architecture-specific declerations
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_arch.h 21637 2018-01-03 22:37:24Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_ARCH_H_
+#define _GK_ARCH_H_
+
+/*************************************************************************
+* Architecture-specific differences in header files
+**************************************************************************/
+#ifdef LINUX
+#if !defined(__USE_XOPEN)
+#define __USE_XOPEN
+#endif
+#if !defined(_XOPEN_SOURCE)
+#define _XOPEN_SOURCE 600
+#endif
+#if !defined(__USE_XOPEN2K)
+#define __USE_XOPEN2K
+#endif
+#endif
+
+
+#ifdef HAVE_EXECINFO_H
+#include <execinfo.h>
+#endif
+
+
+#ifdef __MSC__ 
+  #include "gk_ms_stdint.h"
+  #include "gk_ms_inttypes.h"
+  #include "gk_ms_stat.h"
+  #include "win32/adapt.h"
+#else
+#ifndef SUNOS
+  #include <stdint.h>
+#endif
+  #include <inttypes.h>
+  #include <sys/types.h>
+#ifndef __MINGW32__
+  #include <sys/resource.h>
+#endif
+  #include <sys/time.h>
+  #include <unistd.h>
+#endif
+
+
+/*************************************************************************
+* Architecture-specific modifications
+**************************************************************************/
+#ifdef WIN32
+typedef ptrdiff_t ssize_t;
+#endif
+
+
+#ifdef SUNOS
+#define PTRDIFF_MAX  INT64_MAX
+#endif
+
+/* MSC does not have INFINITY defined */
+#ifndef INFINITY
+#define INFINITY FLT_MAX
+#endif
+
+#endif
diff --git a/gk_defs.h b/gk_defs.h
new file mode 100644
index 0000000..68cb9a4
--- /dev/null
+++ b/gk_defs.h
@@ -0,0 +1,87 @@
+/*!
+\file gk_defs.h
+\brief This file contains various constants definitions
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_defs.h 22039 2018-05-26 16:34:48Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_DEFS_H_
+#define _GK_DEFS_H_
+
+
+#define LTERM                   (void **) 0     /* List terminator for GKfree() */
+
+/* mopt_t types */
+#define GK_MOPT_MARK            1
+#define GK_MOPT_CORE            2
+#define GK_MOPT_HEAP            3
+
+#define HTABLE_EMPTY            -1
+#define HTABLE_DELETED          -2
+#define HTABLE_FIRST             1
+#define HTABLE_NEXT              2
+
+/* pdb corruption bit switches */
+#define CRP_ALTLOCS    1
+#define CRP_MISSINGCA  2
+#define CRP_MISSINGBB  4
+#define CRP_MULTICHAIN 8
+#define CRP_MULTICA    16
+#define CRP_MULTIBB    32
+
+#define MAXLINELEN 300000
+
+/* GKlib signals to standard signal mapping */
+#define SIGMEM  SIGABRT
+#define SIGERR  SIGTERM
+
+
+/* CSR-related defines */
+#define GK_CSR_ROW      1
+#define GK_CSR_COL      2
+#define GK_CSR_ROWCOL   3
+
+#define GK_CSR_MAXTF    1
+#define GK_CSR_SQRT     2
+#define GK_CSR_POW25    3
+#define GK_CSR_POW65    4
+#define GK_CSR_POW75    5
+#define GK_CSR_POW85    6
+#define GK_CSR_LOG      7
+#define GK_CSR_IDF      8
+#define GK_CSR_IDF2     9
+#define GK_CSR_MAXTF2   10
+
+#define GK_CSR_DOTP     1
+#define GK_CSR_COS      2
+#define GK_CSR_JAC      3
+#define GK_CSR_MIN      4
+#define GK_CSR_AMIN     5
+
+#define GK_CSR_FMT_AUTO         2
+#define GK_CSR_FMT_CLUTO        1
+#define GK_CSR_FMT_CSR          2
+#define GK_CSR_FMT_METIS        3
+#define GK_CSR_FMT_BINROW       4
+#define GK_CSR_FMT_BINCOL       5
+#define GK_CSR_FMT_IJV          6
+#define GK_CSR_FMT_BIJV         7
+
+#define GK_CSR_SYM_SUM          1
+#define GK_CSR_SYM_MIN          2
+#define GK_CSR_SYM_MAX          3
+#define GK_CSR_SYM_AVG          4
+
+
+#define GK_GRAPH_FMT_METIS      1
+#define GK_GRAPH_FMT_IJV        2
+#define GK_GRAPH_FMT_HIJV       3
+
+#define GK_GRAPH_SYM_SUM        1
+#define GK_GRAPH_SYM_MIN        2
+#define GK_GRAPH_SYM_MAX        3
+#define GK_GRAPH_SYM_AVG        4
+
+#endif
diff --git a/gk_externs.h b/gk_externs.h
new file mode 100644
index 0000000..2c0fdd9
--- /dev/null
+++ b/gk_externs.h
@@ -0,0 +1,25 @@
+/*!
+\file gk_externs.h
+\brief This file contains definitions of external variables created by GKlib
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_externs.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_EXTERNS_H_
+#define _GK_EXTERNS_H_
+
+
+/*************************************************************************
+* Extern variable definition. Hopefully, the __thread makes them thread-safe.
+**************************************************************************/
+#ifndef _GK_ERROR_C_
+/* declared in error.c */
+extern __thread int gk_cur_jbufs;
+extern __thread jmp_buf gk_jbufs[];
+extern __thread jmp_buf gk_jbuf;
+
+#endif
+
+#endif
diff --git a/gk_getopt.h b/gk_getopt.h
new file mode 100644
index 0000000..597c080
--- /dev/null
+++ b/gk_getopt.h
@@ -0,0 +1,64 @@
+/*!
+\file gk_getopt.h
+\brief This file contains GNU's externs/structs/prototypes
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_getopt.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_GETOPT_H_
+#define _GK_GETOPT_H_
+
+
+/* Externals from getopt.c */
+extern char *gk_optarg;
+extern int gk_optind;
+extern int gk_opterr;
+extern int gk_optopt;
+
+
+/*! \brief The structure that stores the information about the command-line options 
+
+This structure describes a single long option name for the sake of 
+gk_getopt_long(). The argument <tt>long_options</tt> must be an array 
+of these structures, one for each long option. Terminate the array with 
+an element containing all zeros.
+*/
+struct gk_option {
+  char *name;       /*!< This field is the name of the option. */
+  int has_arg;      /*!< This field says whether the option takes an argument. 
+                         It is an integer, and there are three legitimate values: 
+                         no_argument, required_argument and optional_argument. 
+                         */
+  int *flag;        /*!< See the discussion on ::gk_option#val */
+  int val;          /*!< These fields control how to report or act on the option 
+                         when it occurs. 
+                         
+                         If flag is a null pointer, then the val is a value which 
+                         identifies this option. Often these values are chosen 
+                         to uniquely identify particular long options.
+
+                         If flag is not a null pointer, it should be the address 
+                         of an int variable which is the flag for this option. 
+                         The value in val is the value to store in the flag to 
+                         indicate that the option was seen. */
+};
+
+/* Names for the values of the `has_arg' field of `struct gk_option'.  */
+#define no_argument		0
+#define required_argument	1
+#define optional_argument	2
+
+
+/* Function prototypes */
+extern int gk_getopt(int argc, char **argv, char *shortopts);
+extern int gk_getopt_long(int argc, char **argv, char *shortopts,
+              struct gk_option *longopts, int *longind);
+extern int gk_getopt_long_only (int argc, char **argv,
+              char *shortopts, struct gk_option *longopts, int *longind);
+
+
+
+#endif
+
diff --git a/gk_macros.h b/gk_macros.h
new file mode 100644
index 0000000..c3f1b45
--- /dev/null
+++ b/gk_macros.h
@@ -0,0 +1,169 @@
+/*!
+\file gk_macros.h
+\brief This file contains various macros
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_macros.h 15048 2013-08-31 19:38:14Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MACROS_H_
+#define _GK_MACROS_H_
+
+/*-------------------------------------------------------------
+ * Usefull commands 
+ *-------------------------------------------------------------*/
+#define gk_max(a, b) ((a) >= (b) ? (a) : (b))
+#define gk_min(a, b) ((a) >= (b) ? (b) : (a))
+#define gk_max3(a, b, c) ((a) >= (b) && (a) >= (c) ? (a) : ((b) >= (a) && (b) >= (c) ? (b) : (c)))
+#define gk_SWAP(a, b, tmp) do {(tmp) = (a); (a) = (b); (b) = (tmp);} while(0) 
+#define INC_DEC(a, b, val) do {(a) += (val); (b) -= (val);} while(0)
+#define sign(a, b) ((a >= 0 ? b : -b))
+
+#define ONEOVERRANDMAX (1.0/(RAND_MAX+1.0))
+#define RandomInRange(u) ((int) (ONEOVERRANDMAX*(u)*rand()))
+#define RandomInRange_r(s, u) ((int) (ONEOVERRANDMAX*(u)*rand_r(s)))
+
+#define gk_abs(x) ((x) >= 0 ? (x) : -(x))
+
+
+/*-------------------------------------------------------------
+ * Timing macros
+ *-------------------------------------------------------------*/
+#define gk_clearcputimer(tmr) (tmr = 0.0)
+#define gk_startcputimer(tmr) (tmr -= gk_CPUSeconds())
+#define gk_stopcputimer(tmr)  (tmr += gk_CPUSeconds())
+#define gk_getcputimer(tmr)   (tmr)
+
+#define gk_clearwctimer(tmr) (tmr = 0.0)
+#define gk_startwctimer(tmr) (tmr -= gk_WClockSeconds())
+#define gk_stopwctimer(tmr)  (tmr += gk_WClockSeconds())
+#define gk_getwctimer(tmr)   (tmr)
+
+/*-------------------------------------------------------------
+ * dbglvl handling macros
+ *-------------------------------------------------------------*/
+#define IFSET(a, flag, cmd) if ((a)&(flag)) (cmd);
+
+
+/*-------------------------------------------------------------
+ * gracefull library exit macro
+ *-------------------------------------------------------------*/
+#define GKSETJMP() (setjmp(gk_return_to_entry))
+#define gk_sigcatch() (setjmp(gk_jbufs[gk_cur_jbufs]))
+ 
+
+/*-------------------------------------------------------------
+ * Debuging memory leaks
+ *-------------------------------------------------------------*/
+#ifdef DMALLOC
+#   define MALLOC_CHECK(ptr)                                          \
+    if (malloc_verify((ptr)) == DMALLOC_VERIFY_ERROR) {  \
+        printf("***MALLOC_CHECK failed on line %d of file %s: " #ptr "\n", \
+              __LINE__, __FILE__);                               \
+        abort();                                                \
+    }
+#else
+#   define MALLOC_CHECK(ptr) ;
+#endif 
+
+
+/*-------------------------------------------------------------
+ * CSR conversion macros
+ *-------------------------------------------------------------*/
+#define MAKECSR(i, n, a) \
+   do { \
+     for (i=1; i<n; i++) a[i] += a[i-1]; \
+     for (i=n; i>0; i--) a[i] = a[i-1]; \
+     a[0] = 0; \
+   } while(0) 
+
+#define SHIFTCSR(i, n, a) \
+   do { \
+     for (i=n; i>0; i--) a[i] = a[i-1]; \
+     a[0] = 0; \
+   } while(0) 
+
+
+/*-------------------------------------------------------------
+ * ASSERTS that cannot be turned off!
+ *-------------------------------------------------------------*/
+#define GKASSERT(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        abort();                                                \
+    }
+
+#define GKASSERTP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+        abort();                                                \
+    }
+
+#define GKCUASSERT(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+    }
+
+#define GKWARN(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+    }
+
+#define GKCUASSERTP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+    }
+
+#define GKWARNP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+    }
+
+
+/*-------------------------------------------------------------
+ * Program Assertions
+ *-------------------------------------------------------------*/
+#ifndef NDEBUG
+#   define ASSERT(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        assert(expr);                                                \
+    }
+
+#   define ASSERTP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+        assert(expr);                                                \
+    }
+#else
+#   define ASSERT(expr) ;
+#   define ASSERTP(expr,msg) ;
+#endif 
+
+#ifndef NDEBUG2
+#   define ASSERT2 ASSERT
+#   define ASSERTP2 ASSERTP
+#else
+#   define ASSERT2(expr) ;
+#   define ASSERTP2(expr,msg) ;
+#endif
+
+
+#endif
diff --git a/gk_mkblas.h b/gk_mkblas.h
new file mode 100644
index 0000000..1231669
--- /dev/null
+++ b/gk_mkblas.h
@@ -0,0 +1,203 @@
+/*!
+\file  gk_mkblas.h
+\brief Templates for BLAS-like routines
+
+\date   Started 3/28/07
+\author George
+\version\verbatim $Id: gk_mkblas.h 16304 2014-02-25 14:27:19Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MKBLAS_H_
+#define _GK_MKBLAS_H_
+
+
+#define GK_MKBLAS(PRFX, TYPE, OUTTYPE) \
+/*************************************************************************/\
+/*! The macro for gk_?incset()-class of routines */\
+/*************************************************************************/\
+TYPE *PRFX ## incset(size_t n, TYPE baseval, TYPE *x)\
+{\
+  size_t i;\
+\
+  for (i=0; i<n; i++)\
+    x[i] = baseval+i;\
+\
+  return x;\
+}\
+\
+/*************************************************************************/\
+/*! The macro for gk_?max()-class of routines */\
+/*************************************************************************/\
+TYPE PRFX ## max(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  TYPE max;\
+\
+  if (n <= 0) return (TYPE) 0;\
+\
+  for (max=(*x), x+=incx, i=1; i<n; i++, x+=incx)\
+    max = ((*x) > max ? (*x) : max);\
+\
+  return max;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?min()-class of routines */\
+/*************************************************************************/\
+TYPE PRFX ## min(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  TYPE min;\
+\
+  if (n <= 0) return (TYPE) 0;\
+\
+  for (min=(*x), x+=incx, i=1; i<n; i++, x+=incx)\
+    min = ((*x) < min ? (*x) : min);\
+\
+  return min;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?argmax()-class of routines */\
+/*************************************************************************/\
+size_t PRFX ## argmax(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i, j, max=0;\
+\
+  for (i=1, j=incx; i<n; i++, j+=incx)\
+    max = (x[j] > x[max] ? j : max);\
+\
+  return (size_t)(max/incx);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?argmin()-class of routines */\
+/*************************************************************************/\
+size_t PRFX ## argmin(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i, j, min=0;\
+\
+  for (i=1, j=incx; i<n; i++, j+=incx)\
+    min = (x[j] < x[min] ? j : min);\
+\
+  return (size_t)(min/incx);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?argmax_n()-class of routines */\
+/*************************************************************************/\
+size_t PRFX ## argmax_n(size_t n, TYPE *x, size_t incx, size_t k)\
+{\
+  size_t i, j, max_n;\
+  PRFX ## kv_t *cand;\
+\
+  cand = PRFX ## kvmalloc(n, "GK_ARGMAX_N: cand");\
+\
+  for (i=0, j=0; i<n; i++, j+=incx) {\
+    cand[i].val = i;\
+    cand[i].key = x[j];\
+  }\
+  PRFX ## kvsortd(n, cand);\
+\
+  max_n = cand[k-1].val;\
+\
+  gk_free((void *)&cand, LTERM);\
+\
+  return max_n;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?sum()-class of routines */\
+/**************************************************************************/\
+OUTTYPE PRFX ## sum(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  OUTTYPE sum = 0;\
+\
+  for (i=0; i<n; i++, x+=incx)\
+    sum += (*x);\
+\
+  return sum;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?scale()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+\
+  for (i=0; i<n; i++, x+=incx)\
+    (*x) *= alpha;\
+\
+  return x;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?norm2()-class of routines */\
+/**************************************************************************/\
+OUTTYPE PRFX ## norm2(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  OUTTYPE partial = 0;\
+\
+  for (i=0; i<n; i++, x+=incx)\
+    partial += (*x) * (*x);\
+\
+  return (partial > 0 ? (OUTTYPE)sqrt((double)partial) : (OUTTYPE)0);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?dot()-class of routines */\
+/**************************************************************************/\
+OUTTYPE PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy)\
+{\
+  size_t i;\
+  OUTTYPE partial = 0.0;\
+ \
+  for (i=0; i<n; i++, x+=incx, y+=incy)\
+    partial += (*x) * (*y);\
+\
+  return partial;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?axpy()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy)\
+{\
+  size_t i;\
+  TYPE *y_in = y;\
+\
+  for (i=0; i<n; i++, x+=incx, y+=incy)\
+    *y += alpha*(*x);\
+\
+  return y_in;\
+}\
+
+
+
+#define GK_MKBLAS_PROTO(PRFX, TYPE, OUTTYPE) \
+  TYPE    *PRFX ## incset(size_t n, TYPE baseval, TYPE *x);\
+  TYPE     PRFX ## max(size_t n, TYPE *x, size_t incx);\
+  TYPE     PRFX ## min(size_t n, TYPE *x, size_t incx);\
+  size_t   PRFX ## argmax(size_t n, TYPE *x, size_t incx);\
+  size_t   PRFX ## argmin(size_t n, TYPE *x, size_t incx);\
+  size_t   PRFX ## argmax_n(size_t n, TYPE *x, size_t incx, size_t k);\
+  OUTTYPE  PRFX ## sum(size_t n, TYPE *x, size_t incx);\
+  TYPE    *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx);\
+  OUTTYPE  PRFX ## norm2(size_t n, TYPE *x, size_t incx);\
+  OUTTYPE  PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy);\
+  TYPE    *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy);\
+
+
+#endif
diff --git a/gk_mkmemory.h b/gk_mkmemory.h
new file mode 100644
index 0000000..78e216e
--- /dev/null
+++ b/gk_mkmemory.h
@@ -0,0 +1,142 @@
+/*!
+\file  gk_mkmemory.h
+\brief Templates for memory allocation routines
+
+\date   Started 3/29/07
+\author George
+\version\verbatim $Id: gk_mkmemory.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MKMEMORY_H_
+#define _GK_MKMEMORY_H_
+
+
+#define GK_MKALLOC(PRFX, TYPE)\
+/*************************************************************************/\
+/*! The macro for gk_?malloc()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## malloc(size_t n, char *msg)\
+{\
+  return (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?realloc()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## realloc(TYPE *ptr, size_t n, char *msg)\
+{\
+  return (TYPE *)gk_realloc((void *)ptr, sizeof(TYPE)*n, msg);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?smalloc()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## smalloc(size_t n, TYPE ival, char *msg)\
+{\
+  TYPE *ptr;\
+\
+  ptr = (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\
+  if (ptr == NULL) \
+    return NULL; \
+\
+  return PRFX ## set(n, ival, ptr); \
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?set()-class of routines */\
+/*************************************************************************/\
+TYPE *PRFX ## set(size_t n, TYPE val, TYPE *x)\
+{\
+  size_t i;\
+\
+  for (i=0; i<n; i++)\
+    x[i] = val;\
+\
+  return x;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?set()-class of routines */\
+/*************************************************************************/\
+TYPE *PRFX ## copy(size_t n, TYPE *a, TYPE *b)\
+{\
+  return (TYPE *)memmove((void *)b, (void *)a, sizeof(TYPE)*n);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?AllocMatrix()-class of routines */\
+/**************************************************************************/\
+TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg)\
+{\
+  gk_idx_t i, j;\
+  TYPE **matrix;\
+\
+  matrix = (TYPE **)gk_malloc(ndim1*sizeof(TYPE *), errmsg);\
+  if (matrix == NULL) \
+    return NULL;\
+\
+  for (i=0; i<ndim1; i++) { \
+    matrix[i] = PRFX ## smalloc(ndim2, value, errmsg);\
+    if (matrix[i] == NULL) { \
+      for (j=0; j<i; j++) \
+        gk_free((void **)&matrix[j], LTERM); \
+      return NULL; \
+    } \
+  }\
+\
+  return matrix;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?AllocMatrix()-class of routines */\
+/**************************************************************************/\
+void PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2)\
+{\
+  gk_idx_t i;\
+  TYPE **matrix;\
+\
+  if (*r_matrix == NULL) \
+    return; \
+\
+  matrix = *r_matrix;\
+\
+  for (i=0; i<ndim1; i++) \
+    gk_free((void **)&(matrix[i]), LTERM);\
+\
+  gk_free((void **)r_matrix, LTERM);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?SetMatrix()-class of routines */\
+/**************************************************************************/\
+void PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value)\
+{\
+  gk_idx_t i, j;\
+\
+  for (i=0; i<ndim1; i++) {\
+    for (j=0; j<ndim2; j++)\
+      matrix[i][j] = value;\
+  }\
+}\
+
+
+#define GK_MKALLOC_PROTO(PRFX, TYPE)\
+  TYPE  *PRFX ## malloc(size_t n, char *msg);\
+  TYPE  *PRFX ## realloc(TYPE *ptr, size_t n, char *msg);\
+  TYPE  *PRFX ## smalloc(size_t n, TYPE ival, char *msg);\
+  TYPE  *PRFX ## set(size_t n, TYPE val, TYPE *x);\
+  TYPE  *PRFX ## copy(size_t n, TYPE *a, TYPE *b);\
+  TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg);\
+  void   PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2);\
+  void   PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value);\
+
+
+
+#endif
diff --git a/gk_mkpqueue.h b/gk_mkpqueue.h
new file mode 100644
index 0000000..50a5385
--- /dev/null
+++ b/gk_mkpqueue.h
@@ -0,0 +1,440 @@
+/*!
+\file  gk_mkpqueue.h
+\brief Templates for priority queues
+
+\date   Started 4/09/07
+\author George
+\version\verbatim $Id: gk_mkpqueue.h 21742 2018-01-26 16:59:15Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKPQUEUE_H
+#define _GK_MKPQUEUE_H
+
+
+#define GK_MKPQUEUE(FPRFX, PQT, KVT, KT, VT, KVMALLOC, KMAX, KEY_LT)\
+/*************************************************************************/\
+/*! This function creates and initializes a priority queue */\
+/**************************************************************************/\
+PQT *FPRFX ## Create(size_t maxnodes)\
+{\
+  PQT *queue; \
+\
+  queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate: queue");\
+  FPRFX ## Init(queue, maxnodes);\
+\
+  return queue;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function initializes the data structures of the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Init(PQT *queue, size_t maxnodes)\
+{\
+  queue->nnodes = 0;\
+  queue->maxnodes = maxnodes;\
+\
+  queue->heap    = KVMALLOC(maxnodes, "gk_PQInit: heap");\
+  queue->locator = gk_idxsmalloc(maxnodes, -1, "gk_PQInit: locator");\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function resets the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Reset(PQT *queue)\
+{\
+  ssize_t i;\
+  ssize_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  for (i=queue->nnodes-1; i>=0; i--)\
+    locator[heap[i].val] = -1;\
+  queue->nnodes = 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function frees the internal datastructures of the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Free(PQT *queue)\
+{\
+  if (queue == NULL) return;\
+  gk_free((void **)&queue->heap, &queue->locator, LTERM);\
+  queue->maxnodes = 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function frees the internal datastructures of the priority queue \
+    and the queue itself */\
+/**************************************************************************/\
+void FPRFX ## Destroy(PQT *queue)\
+{\
+  if (queue == NULL) return;\
+  FPRFX ## Free(queue);\
+  gk_free((void **)&queue, LTERM);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the length of the queue */\
+/**************************************************************************/\
+size_t FPRFX ## Length(PQT *queue)\
+{\
+  return queue->nnodes;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function adds an item in the priority queue */\
+/**************************************************************************/\
+int FPRFX ## Insert(PQT *queue, VT node, KT key)\
+{\
+  ssize_t i, j;\
+  ssize_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  ASSERT(locator[node] == -1);\
+\
+  i = queue->nnodes++;\
+  while (i > 0) {\
+    j = (i-1)>>1;\
+    if (KEY_LT(key, heap[j].key)) {\
+      heap[i] = heap[j];\
+      locator[heap[i].val] = i;\
+      i = j;\
+    }\
+    else\
+      break;\
+  }\
+  ASSERT(i >= 0);\
+  heap[i].key   = key;\
+  heap[i].val   = node;\
+  locator[node] = i;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  return 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function deletes an item from the priority queue */\
+/**************************************************************************/\
+int FPRFX ## Delete(PQT *queue, VT node)\
+{\
+  ssize_t i, j;\
+  size_t nnodes;\
+  KT newkey, oldkey;\
+  ssize_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  ASSERT(locator[node] != -1);\
+  ASSERT(heap[locator[node]].val == node);\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  i = locator[node];\
+  locator[node] = -1;\
+\
+  if (--queue->nnodes > 0 && heap[queue->nnodes].val != node) {\
+    node   = heap[queue->nnodes].val;\
+    newkey = heap[queue->nnodes].key;\
+    oldkey = heap[i].key;\
+\
+    if (KEY_LT(newkey, oldkey)) { /* Filter-up */\
+      while (i > 0) {\
+        j = (i-1)>>1;\
+        if (KEY_LT(newkey, heap[j].key)) {\
+          heap[i] = heap[j];\
+          locator[heap[i].val] = i;\
+          i = j;\
+        }\
+        else\
+          break;\
+      }\
+    }\
+    else { /* Filter down */\
+      nnodes = queue->nnodes;\
+      while ((j=(i<<1)+1) < nnodes) {\
+        if (KEY_LT(heap[j].key, newkey)) {\
+          if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
+            j++;\
+          heap[i] = heap[j];\
+          locator[heap[i].val] = i;\
+          i = j;\
+        }\
+        else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\
+          j++;\
+          heap[i] = heap[j];\
+          locator[heap[i].val] = i;\
+          i = j;\
+        }\
+        else\
+          break;\
+      }\
+    }\
+\
+    heap[i].key   = newkey;\
+    heap[i].val   = node;\
+    locator[node] = i;\
+  }\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  return 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function updates the key values associated for a particular item */ \
+/**************************************************************************/\
+void FPRFX ## Update(PQT *queue, VT node, KT newkey)\
+{\
+  ssize_t i, j;\
+  size_t nnodes;\
+  KT oldkey;\
+  ssize_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  oldkey = heap[locator[node]].key;\
+  if (!KEY_LT(newkey, oldkey) && !KEY_LT(oldkey, newkey)) return;\
+\
+  ASSERT(locator[node] != -1);\
+  ASSERT(heap[locator[node]].val == node);\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  i = locator[node];\
+\
+  if (KEY_LT(newkey, oldkey)) { /* Filter-up */\
+    while (i > 0) {\
+      j = (i-1)>>1;\
+      if (KEY_LT(newkey, heap[j].key)) {\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+  }\
+  else { /* Filter down */\
+    nnodes = queue->nnodes;\
+    while ((j=(i<<1)+1) < nnodes) {\
+      if (KEY_LT(heap[j].key, newkey)) {\
+        if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
+          j++;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\
+        j++;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+  }\
+\
+  heap[i].key   = newkey;\
+  heap[i].val   = node;\
+  locator[node] = i;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  return;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue and removes\
+    it from the priority queue */\
+/**************************************************************************/\
+VT FPRFX ## GetTop(PQT *queue)\
+{\
+  ssize_t i, j;\
+  ssize_t *locator;\
+  KVT *heap;\
+  VT vtx, node;\
+  KT key;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  if (queue->nnodes == 0)\
+    return -1;\
+\
+  queue->nnodes--;\
+\
+  heap    = queue->heap;\
+  locator = queue->locator;\
+\
+  vtx = heap[0].val;\
+  locator[vtx] = -1;\
+\
+  if ((i = queue->nnodes) > 0) {\
+    key  = heap[i].key;\
+    node = heap[i].val;\
+    i = 0;\
+    while ((j=2*i+1) < queue->nnodes) {\
+      if (KEY_LT(heap[j].key, key)) {\
+        if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
+          j = j+1;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, key)) {\
+        j = j+1;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+\
+    heap[i].key   = key;\
+    heap[i].val   = node;\
+    locator[node] = i;\
+  }\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+  return vtx;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+VT FPRFX ## SeeTopVal(PQT *queue)\
+{\
+  return (queue->nnodes == 0 ? -1 : queue->heap[0].val);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the key of the top item. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+KT FPRFX ## SeeTopKey(PQT *queue)\
+{\
+  return (queue->nnodes == 0 ? KMAX : queue->heap[0].key);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the key of a specific item */\
+/**************************************************************************/\
+KT FPRFX ## SeeKey(PQT *queue, VT node)\
+{\
+  ssize_t *locator;\
+  KVT *heap;\
+\
+  heap    = queue->heap;\
+  locator = queue->locator;\
+\
+  return heap[locator[node]].key;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the first item in a breadth-first traversal of\
+    the heap whose key is less than maxwgt. This function is here due to\
+    hMETIS and is not general!*/\
+/**************************************************************************/\
+/*\
+VT FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts)\
+{\
+  ssize_t i;\
+\
+  if (queue->nnodes == 0)\
+    return -1;\
+\
+  if (maxwgt <= 1000)\
+    return FPRFX ## SeeTopVal(queue);\
+\
+  for (i=0; i<queue->nnodes; i++) {\
+    if (queue->heap[i].key > 0) {\
+      if (wgts[queue->heap[i].val] <= maxwgt)\
+        return queue->heap[i].val;\
+    }\
+    else {\
+      if (queue->heap[i/2].key <= 0)\
+        break;\
+    }\
+  }\
+\
+  return queue->heap[0].val;\
+\
+}\
+*/\
+\
+\
+/*************************************************************************/\
+/*! This functions checks the consistency of the heap */\
+/**************************************************************************/\
+int FPRFX ## CheckHeap(PQT *queue)\
+{\
+  ssize_t i, j;\
+  size_t nnodes;\
+  ssize_t *locator;\
+  KVT *heap;\
+\
+  heap    = queue->heap;\
+  locator = queue->locator;\
+  nnodes  = queue->nnodes;\
+\
+  if (nnodes == 0)\
+    return 1;\
+\
+  ASSERT(locator[heap[0].val] == 0);\
+  for (i=1; i<nnodes; i++) {\
+    ASSERT(locator[heap[i].val] == i);\
+    ASSERT(!KEY_LT(heap[i].key, heap[(i-1)/2].key));\
+  }\
+  for (i=1; i<nnodes; i++)\
+    ASSERT(!KEY_LT(heap[i].key, heap[0].key));\
+\
+  for (j=i=0; i<queue->maxnodes; i++) {\
+    if (locator[i] != -1)\
+      j++;\
+  }\
+  ASSERTP(j == nnodes, ("%jd %jd\n", (intmax_t)j, (intmax_t)nnodes));\
+\
+  return 1;\
+}\
+
+
+#define GK_MKPQUEUE_PROTO(FPRFX, PQT, KT, VT)\
+  PQT *  FPRFX ## Create(size_t maxnodes);\
+  void   FPRFX ## Init(PQT *queue, size_t maxnodes);\
+  void   FPRFX ## Reset(PQT *queue);\
+  void   FPRFX ## Free(PQT *queue);\
+  void   FPRFX ## Destroy(PQT *queue);\
+  size_t FPRFX ## Length(PQT *queue);\
+  int    FPRFX ## Insert(PQT *queue, VT node, KT key);\
+  int    FPRFX ## Delete(PQT *queue, VT node);\
+  void   FPRFX ## Update(PQT *queue, VT node, KT newkey);\
+  VT     FPRFX ## GetTop(PQT *queue);\
+  VT     FPRFX ## SeeTopVal(PQT *queue);\
+  KT     FPRFX ## SeeTopKey(PQT *queue);\
+  KT     FPRFX ## SeeKey(PQT *queue, VT node);\
+  VT     FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts);\
+  int    FPRFX ## CheckHeap(PQT *queue);\
+
+
+/* This is how these macros are used
+GK_MKPQUEUE(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX)
+GK_MKPQUEUE_PROTO(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t)
+*/
+
+
+#endif
diff --git a/gk_mkpqueue2.h b/gk_mkpqueue2.h
new file mode 100644
index 0000000..10e8ee4
--- /dev/null
+++ b/gk_mkpqueue2.h
@@ -0,0 +1,215 @@
+/*!
+\file  gk_mkpqueue2.h
+\brief Templates for priority queues that do not utilize locators and as such
+       they can use different types of values.
+
+\date   Started 4/09/07
+\author George
+\version\verbatim $Id: gk_mkpqueue2.h 13005 2012-10-23 22:34:36Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKPQUEUE2_H
+#define _GK_MKPQUEUE2_H
+
+
+#define GK_MKPQUEUE2(FPRFX, PQT, KT, VT, KMALLOC, VMALLOC, KMAX, KEY_LT)\
+/*************************************************************************/\
+/*! This function creates and initializes a priority queue */\
+/**************************************************************************/\
+PQT *FPRFX ## Create2(ssize_t maxnodes)\
+{\
+  PQT *queue; \
+\
+  if ((queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate2: queue")) != NULL) {\
+    memset(queue, 0, sizeof(PQT));\
+    queue->nnodes   = 0;\
+    queue->maxnodes = maxnodes;\
+    queue->keys     = KMALLOC(maxnodes, "gk_pqCreate2: keys");\
+    queue->vals     = VMALLOC(maxnodes, "gk_pqCreate2: vals");\
+\
+    if (queue->keys == NULL || queue->vals == NULL)\
+      gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\
+  }\
+\
+  return queue;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function resets the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Reset2(PQT *queue)\
+{\
+  queue->nnodes = 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function frees the internal datastructures of the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Destroy2(PQT **r_queue)\
+{\
+  PQT *queue = *r_queue; \
+  if (queue == NULL) return;\
+  gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\
+  *r_queue = NULL;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the length of the queue */\
+/**************************************************************************/\
+size_t FPRFX ## Length2(PQT *queue)\
+{\
+  return queue->nnodes;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function adds an item in the priority queue. */\
+/**************************************************************************/\
+int FPRFX ## Insert2(PQT *queue, VT val, KT key)\
+{\
+  ssize_t i, j;\
+  KT *keys=queue->keys;\
+  VT *vals=queue->vals;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  if (queue->nnodes == queue->maxnodes) \
+    return 0;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  i = queue->nnodes++;\
+  while (i > 0) {\
+    j = (i-1)>>1;\
+    if (KEY_LT(key, keys[j])) {\
+      keys[i] = keys[j];\
+      vals[i] = vals[j];\
+      i = j;\
+    }\
+    else\
+      break;\
+  }\
+  ASSERT(i >= 0);\
+  keys[i] = key;\
+  vals[i] = val;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  return 1;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue and removes\
+    it from the priority queue */\
+/**************************************************************************/\
+int FPRFX ## GetTop2(PQT *queue, VT *r_val)\
+{\
+  ssize_t i, j;\
+  KT key, *keys=queue->keys;\
+  VT val, *vals=queue->vals;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  if (queue->nnodes == 0)\
+    return 0;\
+\
+  queue->nnodes--;\
+\
+  *r_val = vals[0];\
+\
+  if ((i = queue->nnodes) > 0) {\
+    key = keys[i];\
+    val = vals[i];\
+    i = 0;\
+    while ((j=2*i+1) < queue->nnodes) {\
+      if (KEY_LT(keys[j], key)) {\
+        if (j+1 < queue->nnodes && KEY_LT(keys[j+1], keys[j]))\
+          j = j+1;\
+        keys[i] = keys[j];\
+        vals[i] = vals[j];\
+        i = j;\
+      }\
+      else if (j+1 < queue->nnodes && KEY_LT(keys[j+1], key)) {\
+        j = j+1;\
+        keys[i] = keys[j];\
+        vals[i] = vals[j];\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+\
+    keys[i] = key;\
+    vals[i] = val;\
+  }\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  return 1;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+int FPRFX ## SeeTopVal2(PQT *queue, VT *r_val)\
+{\
+  if (queue->nnodes == 0) \
+    return 0;\
+\
+  *r_val = queue->vals[0];\
+\
+  return 1;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the key of the top item. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+KT FPRFX ## SeeTopKey2(PQT *queue)\
+{\
+  return (queue->nnodes == 0 ? KMAX : queue->keys[0]);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This functions checks the consistency of the heap */\
+/**************************************************************************/\
+int FPRFX ## CheckHeap2(PQT *queue)\
+{\
+  ssize_t i;\
+  KT *keys=queue->keys;\
+\
+  if (queue->nnodes == 0)\
+    return 1;\
+\
+  for (i=1; i<queue->nnodes; i++) {\
+    ASSERT(!KEY_LT(keys[i], keys[(i-1)/2]));\
+  }\
+  for (i=1; i<queue->nnodes; i++)\
+    ASSERT(!KEY_LT(keys[i], keys[0]));\
+\
+  return 1;\
+}\
+
+
+#define GK_MKPQUEUE2_PROTO(FPRFX, PQT, KT, VT)\
+  PQT *  FPRFX ## Create2(ssize_t maxnodes);\
+  void   FPRFX ## Reset2(PQT *queue);\
+  void   FPRFX ## Destroy2(PQT **r_queue);\
+  size_t FPRFX ## Length2(PQT *queue);\
+  int    FPRFX ## Insert2(PQT *queue, VT node, KT key);\
+  int    FPRFX ## GetTop2(PQT *queue, VT *r_val);\
+  int    FPRFX ## SeeTopVal2(PQT *queue, VT *r_val);\
+  KT     FPRFX ## SeeTopKey2(PQT *queue);\
+  int    FPRFX ## CheckHeap2(PQT *queue);\
+
+
+#endif
diff --git a/gk_mkrandom.h b/gk_mkrandom.h
new file mode 100644
index 0000000..68d54fa
--- /dev/null
+++ b/gk_mkrandom.h
@@ -0,0 +1,123 @@
+/*!
+\file  
+\brief Templates for portable random number generation
+
+\date   Started 5/17/07
+\author George
+\version\verbatim $Id: gk_mkrandom.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKRANDOM_H
+#define _GK_MKRANDOM_H
+
+/*************************************************************************/\
+/*! The generator for the rand() related routines.  \
+   \params RNGT  the datatype that defines the range of values over which\
+                 random numbers will be generated\
+   \params VALT  the datatype that defines the contents of the array to \
+                 be permuted by randArrayPermute() \
+   \params FPRFX the function prefix \
+*/\
+/**************************************************************************/\
+#define GK_MKRANDOM(FPRFX, RNGT, VALT)\
+/*************************************************************************/\
+/*! Initializes the generator */ \
+/**************************************************************************/\
+void FPRFX ## srand(RNGT seed) \
+{\
+  gk_randinit((uint64_t) seed);\
+}\
+\
+\
+/*************************************************************************/\
+/*! Returns a random number */ \
+/**************************************************************************/\
+RNGT FPRFX ## rand() \
+{\
+  if (sizeof(RNGT) <= sizeof(int32_t)) \
+    return (RNGT)gk_randint32(); \
+  else \
+    return (RNGT)gk_randint64(); \
+}\
+\
+\
+/*************************************************************************/\
+/*! Returns a random number between [0, max) */ \
+/**************************************************************************/\
+RNGT FPRFX ## randInRange(RNGT max) \
+{\
+  return (RNGT)((FPRFX ## rand())%max); \
+}\
+\
+\
+/*************************************************************************/\
+/*! Randomly permutes the elements of an array p[]. \
+    flag == 1, p[i] = i prior to permutation, \
+    flag == 0, p[] is not initialized. */\
+/**************************************************************************/\
+void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag)\
+{\
+  RNGT i, u, v;\
+  VALT tmp;\
+\
+  if (flag == 1) {\
+    for (i=0; i<n; i++)\
+      p[i] = (VALT)i;\
+  }\
+\
+  if (n < 10) {\
+    for (i=0; i<n; i++) {\
+      v = FPRFX ## randInRange(n);\
+      u = FPRFX ## randInRange(n);\
+      gk_SWAP(p[v], p[u], tmp);\
+    }\
+  }\
+  else {\
+    for (i=0; i<nshuffles; i++) {\
+      v = FPRFX ## randInRange(n-3);\
+      u = FPRFX ## randInRange(n-3);\
+      /*gk_SWAP(p[v+0], p[u+0], tmp);*/\
+      /*gk_SWAP(p[v+1], p[u+1], tmp);*/\
+      /*gk_SWAP(p[v+2], p[u+2], tmp);*/\
+      /*gk_SWAP(p[v+3], p[u+3], tmp);*/\
+      gk_SWAP(p[v+0], p[u+2], tmp);\
+      gk_SWAP(p[v+1], p[u+3], tmp);\
+      gk_SWAP(p[v+2], p[u+0], tmp);\
+      gk_SWAP(p[v+3], p[u+1], tmp);\
+    }\
+  }\
+}\
+\
+\
+/*************************************************************************/\
+/*! Randomly permutes the elements of an array p[]. \
+    flag == 1, p[i] = i prior to permutation, \
+    flag == 0, p[] is not initialized. */\
+/**************************************************************************/\
+void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag)\
+{\
+  RNGT i, v;\
+  VALT tmp;\
+\
+  if (flag == 1) {\
+    for (i=0; i<n; i++)\
+      p[i] = (VALT)i;\
+  }\
+\
+  for (i=0; i<n; i++) {\
+    v = FPRFX ## randInRange(n);\
+    gk_SWAP(p[i], p[v], tmp);\
+  }\
+}\
+
+
+#define GK_MKRANDOM_PROTO(FPRFX, RNGT, VALT)\
+  void FPRFX ## srand(RNGT seed); \
+  RNGT FPRFX ## rand(); \
+  RNGT FPRFX ## randInRange(RNGT max); \
+  void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag);\
+  void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag);\
+
+
+#endif
diff --git a/gk_mksort.h b/gk_mksort.h
new file mode 100644
index 0000000..48674db
--- /dev/null
+++ b/gk_mksort.h
@@ -0,0 +1,271 @@
+/*!
+\file  gk_mksort.h
+\brief Templates for the qsort routine
+
+\date   Started 3/28/07
+\author George
+\version\verbatim $Id: gk_mksort.h 21051 2017-05-25 04:36:14Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKSORT_H_
+#define _GK_MKSORT_H_
+
+/* Adopted from GNU glibc by Mjt.
+ * See stdlib/qsort.c in glibc */
+
+/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* in-line qsort implementation.  Differs from traditional qsort() routine
+ * in that it is a macro, not a function, and instead of passing an address
+ * of a comparision routine to the function, it is possible to inline
+ * comparision routine, thus speed up sorting alot.
+ *
+ * Usage:
+ *  #include "iqsort.h"
+ *  #define islt(a,b) (strcmp((*a),(*b))<0)
+ *  char *arr[];
+ *  int n;
+ *  GKQSORT(char*, arr, n, islt);
+ *
+ * The "prototype" and 4 arguments are:
+ *  GKQSORT(TYPE,BASE,NELT,ISLT)
+ *  1) type of each element, TYPE,
+ *  2) address of the beginning of the array, of type TYPE*,
+ *  3) number of elements in the array, and
+ *  4) comparision routine.
+ * Array pointer and number of elements are referenced only once.
+ * This is similar to a call
+ *  qsort(BASE,NELT,sizeof(TYPE),ISLT)
+ * with the difference in last parameter.
+ * Note the islt macro/routine (it receives pointers to two elements):
+ * the only condition of interest is whenever one element is less than
+ * another, no other conditions (greather than, equal to etc) are tested.
+ * So, for example, to define integer sort, use:
+ *  #define islt(a,b) ((*a)<(*b))
+ *  GKQSORT(int, arr, n, islt)
+ *
+ * The macro could be used to implement a sorting function (see examples
+ * below), or to implement the sorting algorithm inline.  That is, either
+ * create a sorting function and use it whenever you want to sort something,
+ * or use GKQSORT() macro directly instead a call to such routine.  Note that
+ * the macro expands to quite some code (compiled size of int qsort on x86
+ * is about 700..800 bytes).
+ *
+ * Using this macro directly it isn't possible to implement traditional
+ * qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE),
+ * while qsort() allows element size to be different.
+ *
+ * Several ready-to-use examples:
+ *
+ * Sorting array of integers:
+ * void int_qsort(int *arr, unsigned n) {
+ * #define int_lt(a,b) ((*a)<(*b))
+ *   GKQSORT(int, arr, n, int_lt);
+ * }
+ *
+ * Sorting array of string pointers:
+ * void str_qsort(char *arr[], unsigned n) {
+ * #define str_lt(a,b) (strcmp((*a),(*b)) < 0)
+ *   GKQSORT(char*, arr, n, str_lt);
+ * }
+ *
+ * Sorting array of structures:
+ *
+ * struct elt {
+ *   int key;
+ *   ...
+ * };
+ * void elt_qsort(struct elt *arr, unsigned n) {
+ * #define elt_lt(a,b) ((a)->key < (b)->key)
+ *  GKQSORT(struct elt, arr, n, elt_lt);
+ * }
+ *
+ * And so on.
+ */
+
+/* Swap two items pointed to by A and B using temporary buffer t. */
+#define _GKQSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t)))
+
+/* Discontinue quicksort algorithm when partition gets below this size. */
+#define _GKQSORT_MAX_THRESH 8
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+#define _GKQSORT_STACK_SIZE	    (8 * sizeof(size_t))
+#define _GKQSORT_PUSH(top, low, high) (((top->_lo = (low)), (top->_hi = (high)), ++top))
+#define	_GKQSORT_POP(low, high, top)  ((--top, (low = top->_lo), (high = top->_hi)))
+#define	_GKQSORT_STACK_NOT_EMPTY	    (_stack < _top)
+
+
+/* The main code starts here... */
+#define GK_MKQSORT(GKQSORT_TYPE,GKQSORT_BASE,GKQSORT_NELT,GKQSORT_LT)   \
+{									\
+  GKQSORT_TYPE *const _base = (GKQSORT_BASE);				\
+  const size_t _elems = (GKQSORT_NELT);					\
+  GKQSORT_TYPE _hold;							\
+									\
+  if (_elems < 1)                                                      \
+    return;                                                             \
+                                                                        \
+  /* Don't declare two variables of type GKQSORT_TYPE in a single	\
+   * statement: eg `TYPE a, b;', in case if TYPE is a pointer,		\
+   * expands to `type* a, b;' wich isn't what we want.			\
+   */									\
+									\
+  if (_elems > _GKQSORT_MAX_THRESH) {					\
+    GKQSORT_TYPE *_lo = _base;						\
+    GKQSORT_TYPE *_hi = _lo + _elems - 1;				\
+    struct {								\
+      GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo;				\
+    } _stack[_GKQSORT_STACK_SIZE], *_top = _stack + 1;			\
+									\
+    while (_GKQSORT_STACK_NOT_EMPTY) {					\
+      GKQSORT_TYPE *_left_ptr; GKQSORT_TYPE *_right_ptr;		\
+									\
+      /* Select median value from among LO, MID, and HI. Rearrange	\
+         LO and HI so the three values are sorted. This lowers the	\
+         probability of picking a pathological pivot value and		\
+         skips a comparison for both the LEFT_PTR and RIGHT_PTR in	\
+         the while loops. */						\
+									\
+      GKQSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1);			\
+									\
+      if (GKQSORT_LT (_mid, _lo))					\
+        _GKQSORT_SWAP (_mid, _lo, _hold);				\
+      if (GKQSORT_LT (_hi, _mid))					\
+        _GKQSORT_SWAP (_mid, _hi, _hold);				\
+      else								\
+        goto _jump_over;						\
+      if (GKQSORT_LT (_mid, _lo))					\
+        _GKQSORT_SWAP (_mid, _lo, _hold);				\
+  _jump_over:;								\
+									\
+      _left_ptr  = _lo + 1;						\
+      _right_ptr = _hi - 1;						\
+									\
+      /* Here's the famous ``collapse the walls'' section of quicksort.	\
+         Gotta like those tight inner loops!  They are the main reason	\
+         that this algorithm runs much faster than others. */		\
+      do {								\
+        while (GKQSORT_LT (_left_ptr, _mid))				\
+         ++_left_ptr;							\
+									\
+        while (GKQSORT_LT (_mid, _right_ptr))				\
+          --_right_ptr;							\
+									\
+        if (_left_ptr < _right_ptr) {					\
+          _GKQSORT_SWAP (_left_ptr, _right_ptr, _hold);			\
+          if (_mid == _left_ptr)					\
+            _mid = _right_ptr;						\
+          else if (_mid == _right_ptr)					\
+            _mid = _left_ptr;						\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+        }								\
+        else if (_left_ptr == _right_ptr) {				\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+          break;							\
+        }								\
+      } while (_left_ptr <= _right_ptr);				\
+									\
+     /* Set up pointers for next iteration.  First determine whether	\
+        left and right partitions are below the threshold size.  If so,	\
+        ignore one or both.  Otherwise, push the larger partition's	\
+        bounds on the stack and continue sorting the smaller one. */	\
+									\
+      if (_right_ptr - _lo <= _GKQSORT_MAX_THRESH) {			\
+        if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH)			\
+          /* Ignore both small partitions. */				\
+          _GKQSORT_POP (_lo, _hi, _top);				\
+        else								\
+          /* Ignore small left partition. */				\
+          _lo = _left_ptr;						\
+      }									\
+      else if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH)			\
+        /* Ignore small right partition. */				\
+        _hi = _right_ptr;						\
+      else if (_right_ptr - _lo > _hi - _left_ptr) {			\
+        /* Push larger left partition indices. */			\
+        _GKQSORT_PUSH (_top, _lo, _right_ptr);				\
+        _lo = _left_ptr;						\
+      }									\
+      else {								\
+        /* Push larger right partition indices. */			\
+        _GKQSORT_PUSH (_top, _left_ptr, _hi);				\
+        _hi = _right_ptr;						\
+      }									\
+    }									\
+  }									\
+									\
+  /* Once the BASE array is partially sorted by quicksort the rest	\
+     is completely sorted using insertion sort, since this is efficient	\
+     for partitions below MAX_THRESH size. BASE points to the		\
+     beginning of the array to sort, and END_PTR points at the very	\
+     last element in the array (*not* one beyond it!). */		\
+									\
+  {									\
+    GKQSORT_TYPE *const _end_ptr = _base + _elems - 1;			\
+    GKQSORT_TYPE *_tmp_ptr = _base;					\
+    register GKQSORT_TYPE *_run_ptr;					\
+    GKQSORT_TYPE *_thresh;						\
+									\
+    _thresh = _base + _GKQSORT_MAX_THRESH;				\
+    if (_thresh > _end_ptr)						\
+      _thresh = _end_ptr;						\
+									\
+    /* Find smallest element in first threshold and place it at the	\
+       array's beginning.  This is the smallest array element,		\
+       and the operation speeds up insertion sort's inner loop. */	\
+									\
+    for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr)	\
+      if (GKQSORT_LT (_run_ptr, _tmp_ptr))				\
+        _tmp_ptr = _run_ptr;						\
+									\
+    if (_tmp_ptr != _base)						\
+      _GKQSORT_SWAP (_tmp_ptr, _base, _hold);				\
+									\
+    /* Insertion sort, running from left-hand-side			\
+     * up to right-hand-side.  */					\
+									\
+    _run_ptr = _base + 1;						\
+    while (++_run_ptr <= _end_ptr) {					\
+      _tmp_ptr = _run_ptr - 1;						\
+      while (GKQSORT_LT (_run_ptr, _tmp_ptr))				\
+        --_tmp_ptr;							\
+									\
+      ++_tmp_ptr;							\
+      if (_tmp_ptr != _run_ptr) {					\
+        GKQSORT_TYPE *_trav = _run_ptr + 1;				\
+        while (--_trav >= _run_ptr) {					\
+          GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo;				\
+          _hold = *_trav;						\
+									\
+          for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo)		\
+            *_hi = *_lo;						\
+          *_hi = _hold;							\
+        }								\
+      }									\
+    }									\
+  }									\
+									\
+}
+
+#endif
diff --git a/gk_mkutils.h b/gk_mkutils.h
new file mode 100644
index 0000000..a092f22
--- /dev/null
+++ b/gk_mkutils.h
@@ -0,0 +1,40 @@
+/*!
+\file  
+\brief Templates for various utility routines
+
+\date   Started 5/28/07
+\author George
+\version\verbatim $Id: gk_mkutils.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MKUTILS_H_
+#define _GK_MKUTILS_H_
+
+
+#define GK_MKARRAY2CSR(PRFX, TYPE)\
+/*************************************************************************/\
+/*! The macro for gk_?array2csr() routine */\
+/**************************************************************************/\
+void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind)\
+{\
+  TYPE i;\
+\
+  for (i=0; i<=range; i++)\
+    ptr[i] = 0;\
+\
+  for (i=0; i<n; i++)\
+    ptr[array[i]]++;\
+\
+  /* Compute the ptr, ind structure */\
+  MAKECSR(i, range, ptr);\
+  for (i=0; i<n; i++)\
+    ind[ptr[array[i]]++] = i;\
+  SHIFTCSR(i, range, ptr);\
+}
+
+
+#define GK_MKARRAY2CSR_PROTO(PRFX, TYPE)\
+  void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind);\
+
+
+#endif
diff --git a/gk_ms_inttypes.h b/gk_ms_inttypes.h
new file mode 100644
index 0000000..b89fc10
--- /dev/null
+++ b/gk_ms_inttypes.h
@@ -0,0 +1,301 @@
+// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_INTTYPES_H_ // [
+#define _MSC_INTTYPES_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include "gk_ms_stdint.h"
+
+// 7.8 Format conversion of integer types
+
+typedef struct {
+   intmax_t quot;
+   intmax_t rem;
+} imaxdiv_t;
+
+// 7.8.1 Macros for format specifiers
+
+// The fprintf macros for signed integers are:
+#define PRId8       "d"
+#define PRIi8       "i"
+#define PRIdLEAST8  "d"
+#define PRIiLEAST8  "i"
+#define PRIdFAST8   "d"
+#define PRIiFAST8   "i"
+
+#define PRId16       "hd"
+#define PRIi16       "hi"
+#define PRIdLEAST16  "hd"
+#define PRIiLEAST16  "hi"
+#define PRIdFAST16   "hd"
+#define PRIiFAST16   "hi"
+
+#define PRId32       "I32d"
+#define PRIi32       "I32i"
+#define PRIdLEAST32  "I32d"
+#define PRIiLEAST32  "I32i"
+#define PRIdFAST32   "I32d"
+#define PRIiFAST32   "I32i"
+
+#define PRId64       "I64d"
+#define PRIi64       "I64i"
+#define PRIdLEAST64  "I64d"
+#define PRIiLEAST64  "I64i"
+#define PRIdFAST64   "I64d"
+#define PRIiFAST64   "I64i"
+
+#define PRIdMAX     "I64d"
+#define PRIiMAX     "I64i"
+
+#define PRIdPTR     "Id"
+#define PRIiPTR     "Ii"
+
+// The fprintf macros for unsigned integers are:
+#define PRIo8       "o"
+#define PRIu8       "u"
+#define PRIx8       "x"
+#define PRIX8       "X"
+#define PRIoLEAST8  "o"
+#define PRIuLEAST8  "u"
+#define PRIxLEAST8  "x"
+#define PRIXLEAST8  "X"
+#define PRIoFAST8   "o"
+#define PRIuFAST8   "u"
+#define PRIxFAST8   "x"
+#define PRIXFAST8   "X"
+
+#define PRIo16       "ho"
+#define PRIu16       "hu"
+#define PRIx16       "hx"
+#define PRIX16       "hX"
+#define PRIoLEAST16  "ho"
+#define PRIuLEAST16  "hu"
+#define PRIxLEAST16  "hx"
+#define PRIXLEAST16  "hX"
+#define PRIoFAST16   "ho"
+#define PRIuFAST16   "hu"
+#define PRIxFAST16   "hx"
+#define PRIXFAST16   "hX"
+
+#define PRIo32       "I32o"
+#define PRIu32       "I32u"
+#define PRIx32       "I32x"
+#define PRIX32       "I32X"
+#define PRIoLEAST32  "I32o"
+#define PRIuLEAST32  "I32u"
+#define PRIxLEAST32  "I32x"
+#define PRIXLEAST32  "I32X"
+#define PRIoFAST32   "I32o"
+#define PRIuFAST32   "I32u"
+#define PRIxFAST32   "I32x"
+#define PRIXFAST32   "I32X"
+
+#define PRIo64       "I64o"
+#define PRIu64       "I64u"
+#define PRIx64       "I64x"
+#define PRIX64       "I64X"
+#define PRIoLEAST64  "I64o"
+#define PRIuLEAST64  "I64u"
+#define PRIxLEAST64  "I64x"
+#define PRIXLEAST64  "I64X"
+#define PRIoFAST64   "I64o"
+#define PRIuFAST64   "I64u"
+#define PRIxFAST64   "I64x"
+#define PRIXFAST64   "I64X"
+
+#define PRIoMAX     "I64o"
+#define PRIuMAX     "I64u"
+#define PRIxMAX     "I64x"
+#define PRIXMAX     "I64X"
+
+#define PRIoPTR     "Io"
+#define PRIuPTR     "Iu"
+#define PRIxPTR     "Ix"
+#define PRIXPTR     "IX"
+
+// The fscanf macros for signed integers are:
+#define SCNd8       "d"
+#define SCNi8       "i"
+#define SCNdLEAST8  "d"
+#define SCNiLEAST8  "i"
+#define SCNdFAST8   "d"
+#define SCNiFAST8   "i"
+
+#define SCNd16       "hd"
+#define SCNi16       "hi"
+#define SCNdLEAST16  "hd"
+#define SCNiLEAST16  "hi"
+#define SCNdFAST16   "hd"
+#define SCNiFAST16   "hi"
+
+#define SCNd32       "ld"
+#define SCNi32       "li"
+#define SCNdLEAST32  "ld"
+#define SCNiLEAST32  "li"
+#define SCNdFAST32   "ld"
+#define SCNiFAST32   "li"
+
+#define SCNd64       "I64d"
+#define SCNi64       "I64i"
+#define SCNdLEAST64  "I64d"
+#define SCNiLEAST64  "I64i"
+#define SCNdFAST64   "I64d"
+#define SCNiFAST64   "I64i"
+
+#define SCNdMAX     "I64d"
+#define SCNiMAX     "I64i"
+
+#ifdef _WIN64 // [
+#  define SCNdPTR     "I64d"
+#  define SCNiPTR     "I64i"
+#else  // _WIN64 ][
+#  define SCNdPTR     "ld"
+#  define SCNiPTR     "li"
+#endif  // _WIN64 ]
+
+// The fscanf macros for unsigned integers are:
+#define SCNo8       "o"
+#define SCNu8       "u"
+#define SCNx8       "x"
+#define SCNX8       "X"
+#define SCNoLEAST8  "o"
+#define SCNuLEAST8  "u"
+#define SCNxLEAST8  "x"
+#define SCNXLEAST8  "X"
+#define SCNoFAST8   "o"
+#define SCNuFAST8   "u"
+#define SCNxFAST8   "x"
+#define SCNXFAST8   "X"
+
+#define SCNo16       "ho"
+#define SCNu16       "hu"
+#define SCNx16       "hx"
+#define SCNX16       "hX"
+#define SCNoLEAST16  "ho"
+#define SCNuLEAST16  "hu"
+#define SCNxLEAST16  "hx"
+#define SCNXLEAST16  "hX"
+#define SCNoFAST16   "ho"
+#define SCNuFAST16   "hu"
+#define SCNxFAST16   "hx"
+#define SCNXFAST16   "hX"
+
+#define SCNo32       "lo"
+#define SCNu32       "lu"
+#define SCNx32       "lx"
+#define SCNX32       "lX"
+#define SCNoLEAST32  "lo"
+#define SCNuLEAST32  "lu"
+#define SCNxLEAST32  "lx"
+#define SCNXLEAST32  "lX"
+#define SCNoFAST32   "lo"
+#define SCNuFAST32   "lu"
+#define SCNxFAST32   "lx"
+#define SCNXFAST32   "lX"
+
+#define SCNo64       "I64o"
+#define SCNu64       "I64u"
+#define SCNx64       "I64x"
+#define SCNX64       "I64X"
+#define SCNoLEAST64  "I64o"
+#define SCNuLEAST64  "I64u"
+#define SCNxLEAST64  "I64x"
+#define SCNXLEAST64  "I64X"
+#define SCNoFAST64   "I64o"
+#define SCNuFAST64   "I64u"
+#define SCNxFAST64   "I64x"
+#define SCNXFAST64   "I64X"
+
+#define SCNoMAX     "I64o"
+#define SCNuMAX     "I64u"
+#define SCNxMAX     "I64x"
+#define SCNXMAX     "I64X"
+
+#ifdef _WIN64 // [
+#  define SCNoPTR     "I64o"
+#  define SCNuPTR     "I64u"
+#  define SCNxPTR     "I64x"
+#  define SCNXPTR     "I64X"
+#else  // _WIN64 ][
+#  define SCNoPTR     "lo"
+#  define SCNuPTR     "lu"
+#  define SCNxPTR     "lx"
+#  define SCNXPTR     "lX"
+#endif  // _WIN64 ]
+
+// 7.8.2 Functions for greatest-width integer types
+
+// 7.8.2.1 The imaxabs function
+#define imaxabs _abs64
+
+// 7.8.2.2 The imaxdiv function
+
+// This is modified version of div() function from Microsoft's div.c found
+// in %MSVC.NET%\crt\src\div.c
+#ifdef STATIC_IMAXDIV // [
+static
+#else // STATIC_IMAXDIV ][
+_inline
+#endif // STATIC_IMAXDIV ]
+imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
+{
+   imaxdiv_t result;
+
+   result.quot = numer / denom;
+   result.rem = numer % denom;
+
+   if (numer < 0 && result.rem > 0) {
+      // did division wrong; must fix up
+      ++result.quot;
+      result.rem -= denom;
+   }
+
+   return result;
+}
+
+// 7.8.2.3 The strtoimax and strtoumax functions
+#define strtoimax _strtoi64
+#define strtoumax _strtoui64
+
+// 7.8.2.4 The wcstoimax and wcstoumax functions
+#define wcstoimax _wcstoi64
+#define wcstoumax _wcstoui64
+
+
+#endif // _MSC_INTTYPES_H_ ]
diff --git a/gk_ms_stat.h b/gk_ms_stat.h
new file mode 100644
index 0000000..a1ef6fa
--- /dev/null
+++ b/gk_ms_stat.h
@@ -0,0 +1,22 @@
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MS_STAT_H_
+#define _MS_STAT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <sys/stat.h>
+/* Test macros for file types.  */
+
+#define __S_ISTYPE(mode, mask)  (((mode) & S_IFMT) == (mask))
+
+#define S_ISDIR(mode)    __S_ISTYPE((mode), S_IFDIR)
+#define S_ISCHR(mode)    __S_ISTYPE((mode), S_IFCHR)
+#define S_ISBLK(mode)    __S_ISTYPE((mode), S_IFBLK)
+#define S_ISREG(mode)    __S_ISTYPE((mode), S_IFREG)
+
+#endif 
diff --git a/gk_ms_stdint.h b/gk_ms_stdint.h
new file mode 100644
index 0000000..7e200dc
--- /dev/null
+++ b/gk_ms_stdint.h
@@ -0,0 +1,222 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+   extern "C++" {
+#endif 
+#     include <wchar.h>
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+   }
+#endif
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+typedef __int8            int8_t;
+typedef __int16           int16_t;
+typedef __int32           int32_t;
+typedef __int64           int64_t;
+typedef unsigned __int8   uint8_t;
+typedef unsigned __int16  uint16_t;
+typedef unsigned __int32  uint32_t;
+typedef unsigned __int64  uint64_t;
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef __int64           intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef int               intptr_t;
+   typedef unsigned int      uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
diff --git a/gk_proto.h b/gk_proto.h
new file mode 100644
index 0000000..6fd6bd4
--- /dev/null
+++ b/gk_proto.h
@@ -0,0 +1,426 @@
+/*!
+\file gk_proto.h
+\brief This file contains function prototypes
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_proto.h 22010 2018-05-14 20:20:26Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_PROTO_H_
+#define _GK_PROTO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*-------------------------------------------------------------
+ * blas.c 
+ *-------------------------------------------------------------*/
+GK_MKBLAS_PROTO(gk_c,   char,     int)
+GK_MKBLAS_PROTO(gk_i,   int,      int)
+GK_MKBLAS_PROTO(gk_i8,  int8_t,   int8_t)
+GK_MKBLAS_PROTO(gk_i16, int16_t,  int16_t)
+GK_MKBLAS_PROTO(gk_i32, int32_t,  int32_t)
+GK_MKBLAS_PROTO(gk_i64, int64_t,  int64_t)
+GK_MKBLAS_PROTO(gk_z,   ssize_t,  ssize_t)
+GK_MKBLAS_PROTO(gk_zu,  size_t,   size_t)
+GK_MKBLAS_PROTO(gk_f,   float,    float)
+GK_MKBLAS_PROTO(gk_d,   double,   double)
+GK_MKBLAS_PROTO(gk_idx, gk_idx_t, gk_idx_t)
+
+
+
+
+/*-------------------------------------------------------------
+ * io.c
+ *-------------------------------------------------------------*/
+FILE *gk_fopen(char *, char *, const char *);
+void gk_fclose(FILE *);
+ssize_t gk_read(int fd, void *vbuf, size_t count);
+ssize_t gk_write(int fd, void *vbuf, size_t count);
+ssize_t gk_getline(char **lineptr, size_t *n, FILE *stream);
+char **gk_readfile(char *fname, size_t *r_nlines);
+int32_t *gk_i32readfile(char *fname, size_t *r_nlines);
+int64_t *gk_i64readfile(char *fname, size_t *r_nlines);
+ssize_t *gk_zreadfile(char *fname, size_t *r_nlines);
+char *gk_creadfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_cwritefilebin(char *fname, size_t n, char *a);
+int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a);
+int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a);
+ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a);
+float *gk_freadfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_fwritefilebin(char *fname, size_t n, float *a);
+double *gk_dreadfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_dwritefilebin(char *fname, size_t n, double *a);
+
+
+
+
+/*-------------------------------------------------------------
+ * fs.c
+ *-------------------------------------------------------------*/
+int gk_fexists(char *);
+int gk_dexists(char *);
+ssize_t gk_getfsize(char *);
+void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, 
+          size_t *r_max_nlntokens, size_t *r_nbytes);
+char *gk_getbasename(char *path);
+char *gk_getextname(char *path);
+char *gk_getfilename(char *path);
+char *gk_getpathname(char *path);
+int gk_mkpath(char *);
+int gk_rmpath(char *);
+
+
+
+/*-------------------------------------------------------------
+ * memory.c
+ *-------------------------------------------------------------*/
+GK_MKALLOC_PROTO(gk_c,    char)
+GK_MKALLOC_PROTO(gk_i,    int)
+GK_MKALLOC_PROTO(gk_i8,   int8_t)
+GK_MKALLOC_PROTO(gk_i16,  int16_t)
+GK_MKALLOC_PROTO(gk_i32,  int32_t)
+GK_MKALLOC_PROTO(gk_i64,  int64_t)
+GK_MKALLOC_PROTO(gk_ui8,  uint8_t)
+GK_MKALLOC_PROTO(gk_ui16, uint16_t)
+GK_MKALLOC_PROTO(gk_ui32, uint32_t)
+GK_MKALLOC_PROTO(gk_ui64, uint64_t)
+GK_MKALLOC_PROTO(gk_z,    ssize_t)
+GK_MKALLOC_PROTO(gk_zu,   size_t)
+GK_MKALLOC_PROTO(gk_f,    float)
+GK_MKALLOC_PROTO(gk_d,    double)
+GK_MKALLOC_PROTO(gk_idx,  gk_idx_t)
+
+GK_MKALLOC_PROTO(gk_ckv,   gk_ckv_t)
+GK_MKALLOC_PROTO(gk_ikv,   gk_ikv_t)
+GK_MKALLOC_PROTO(gk_i8kv,  gk_i8kv_t)
+GK_MKALLOC_PROTO(gk_i16kv, gk_i16kv_t)
+GK_MKALLOC_PROTO(gk_i32kv, gk_i32kv_t)
+GK_MKALLOC_PROTO(gk_i64kv, gk_i64kv_t)
+GK_MKALLOC_PROTO(gk_zkv,   gk_zkv_t)
+GK_MKALLOC_PROTO(gk_zukv,  gk_zukv_t)
+GK_MKALLOC_PROTO(gk_fkv,   gk_fkv_t)
+GK_MKALLOC_PROTO(gk_dkv,   gk_dkv_t)
+GK_MKALLOC_PROTO(gk_skv,   gk_skv_t)
+GK_MKALLOC_PROTO(gk_idxkv, gk_idxkv_t)
+
+void   gk_AllocMatrix(void ***, size_t, size_t , size_t);
+void   gk_FreeMatrix(void ***, size_t, size_t);
+int    gk_malloc_init();
+void   gk_malloc_cleanup(int showstats);
+void  *gk_malloc(size_t nbytes, char *msg);
+void  *gk_realloc(void *oldptr, size_t nbytes, char *msg);
+void   gk_free(void **ptr1,...);
+size_t gk_GetCurMemoryUsed();
+size_t gk_GetMaxMemoryUsed();
+void   gk_GetVMInfo(size_t *vmsize, size_t *vmrss);
+size_t gk_GetProcVmPeak();
+
+
+
+/*-------------------------------------------------------------
+ * seq.c
+ *-------------------------------------------------------------*/
+gk_seq_t *gk_seq_ReadGKMODPSSM(char *file_name);
+gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet);
+void gk_seq_init(gk_seq_t *seq);
+
+
+
+/*-------------------------------------------------------------
+ * error.c
+ *-------------------------------------------------------------*/
+void gk_set_exit_on_error(int value);
+void errexit(char *,...);
+void gk_errexit(int signum, char *,...);
+int gk_sigtrap();
+int gk_siguntrap();
+void gk_sigthrow(int signum);
+void gk_SetSignalHandlers();
+void gk_UnsetSignalHandlers();
+void gk_NonLocalExit_Handler(int signum);
+char *gk_strerror(int errnum);
+void PrintBackTrace();
+
+
+/*-------------------------------------------------------------
+ * util.c
+ *-------------------------------------------------------------*/
+void  gk_RandomPermute(size_t, int *, int);
+void  gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind);
+int   gk_log2(int);
+int   gk_ispow2(int);
+float gk_flog2(float);
+
+
+/*-------------------------------------------------------------
+ * time.c
+ *-------------------------------------------------------------*/
+gk_wclock_t gk_WClockSeconds(void);
+double gk_CPUSeconds(void);
+
+/*-------------------------------------------------------------
+ * string.c
+ *-------------------------------------------------------------*/
+char   *gk_strchr_replace(char *str, char *fromlist, char *tolist);
+int     gk_strstr_replace(char *str, char *pattern, char *replacement, char *options, char **new_str);
+char   *gk_strtprune(char *, char *);
+char   *gk_strhprune(char *, char *);
+char   *gk_strtoupper(char *); 
+char   *gk_strtolower(char *); 
+char   *gk_strdup(char *orgstr);
+int     gk_strcasecmp(char *s1, char *s2);
+int     gk_strrcmp(char *s1, char *s2);
+char   *gk_time2str(time_t time);
+time_t  gk_str2time(char *str);
+int     gk_GetStringID(gk_StringMap_t *strmap, char *key);
+
+
+
+/*-------------------------------------------------------------
+ * sort.c 
+ *-------------------------------------------------------------*/
+void gk_csorti(size_t, char *);
+void gk_csortd(size_t, char *);
+void gk_isorti(size_t, int *);
+void gk_isortd(size_t, int *);
+void gk_i32sorti(size_t, int32_t *);
+void gk_i32sortd(size_t, int32_t *);
+void gk_i64sorti(size_t, int64_t *);
+void gk_i64sortd(size_t, int64_t *);
+void gk_ui32sorti(size_t, uint32_t *);
+void gk_ui32sortd(size_t, uint32_t *);
+void gk_ui64sorti(size_t, uint64_t *);
+void gk_ui64sortd(size_t, uint64_t *);
+void gk_fsorti(size_t, float *);
+void gk_fsortd(size_t, float *);
+void gk_dsorti(size_t, double *);
+void gk_dsortd(size_t, double *);
+void gk_idxsorti(size_t, gk_idx_t *);
+void gk_idxsortd(size_t, gk_idx_t *);
+void gk_ckvsorti(size_t, gk_ckv_t *);
+void gk_ckvsortd(size_t, gk_ckv_t *);
+void gk_ikvsorti(size_t, gk_ikv_t *);
+void gk_ikvsortd(size_t, gk_ikv_t *);
+void gk_i32kvsorti(size_t, gk_i32kv_t *);
+void gk_i32kvsortd(size_t, gk_i32kv_t *);
+void gk_i64kvsorti(size_t, gk_i64kv_t *);
+void gk_i64kvsortd(size_t, gk_i64kv_t *);
+void gk_zkvsorti(size_t, gk_zkv_t *);
+void gk_zkvsortd(size_t, gk_zkv_t *);
+void gk_zukvsorti(size_t, gk_zukv_t *);
+void gk_zukvsortd(size_t, gk_zukv_t *);
+void gk_fkvsorti(size_t, gk_fkv_t *);
+void gk_fkvsortd(size_t, gk_fkv_t *);
+void gk_dkvsorti(size_t, gk_dkv_t *);
+void gk_dkvsortd(size_t, gk_dkv_t *);
+void gk_skvsorti(size_t, gk_skv_t *);
+void gk_skvsortd(size_t, gk_skv_t *);
+void gk_idxkvsorti(size_t, gk_idxkv_t *);
+void gk_idxkvsortd(size_t, gk_idxkv_t *);
+
+
+/*-------------------------------------------------------------
+ * Selection routines
+ *-------------------------------------------------------------*/
+int  gk_dfkvkselect(size_t, int, gk_fkv_t *);
+int  gk_ifkvkselect(size_t, int, gk_fkv_t *);
+
+
+/*-------------------------------------------------------------
+ * Priority queue 
+ *-------------------------------------------------------------*/
+GK_MKPQUEUE_PROTO(gk_ipq,   gk_ipq_t,   int,      gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_i32pq, gk_i32pq_t, int32_t,  gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_i64pq, gk_i64pq_t, int64_t,  gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_fpq,   gk_fpq_t,   float,    gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_dpq,   gk_dpq_t,   double,   gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_idxpq, gk_idxpq_t, gk_idx_t, gk_idx_t)
+
+
+/*-------------------------------------------------------------
+ * HTable routines
+ *-------------------------------------------------------------*/
+gk_HTable_t *HTable_Create(int nelements);
+void         HTable_Reset(gk_HTable_t *htable);
+void         HTable_Resize(gk_HTable_t *htable, int nelements);
+void         HTable_Insert(gk_HTable_t *htable, int key, int val);
+void         HTable_Delete(gk_HTable_t *htable, int key);
+int          HTable_Search(gk_HTable_t *htable, int key);
+int          HTable_GetNext(gk_HTable_t *htable, int key, int *val, int type);
+int          HTable_SearchAndDelete(gk_HTable_t *htable, int key);
+void         HTable_Destroy(gk_HTable_t *htable);
+int          HTable_HFunction(int nelements, int key);
+ 
+
+/*-------------------------------------------------------------
+ * Tokenizer routines
+ *-------------------------------------------------------------*/
+void gk_strtokenize(char *line, char *delim, gk_Tokens_t *tokens);
+void gk_freetokenslist(gk_Tokens_t *tokens);
+
+/*-------------------------------------------------------------
+ * Encoder/Decoder
+ *-------------------------------------------------------------*/
+void encodeblock(unsigned char *in, unsigned char *out);
+void decodeblock(unsigned char *in, unsigned char *out);
+void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer);
+void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer);
+
+
+/*-------------------------------------------------------------
+ * random.c
+ *-------------------------------------------------------------*/
+GK_MKRANDOM_PROTO(gk_c,   size_t, char)
+GK_MKRANDOM_PROTO(gk_i,   size_t, int)
+GK_MKRANDOM_PROTO(gk_i32, size_t, int32_t)
+GK_MKRANDOM_PROTO(gk_f,   size_t, float)
+GK_MKRANDOM_PROTO(gk_d,   size_t, double)
+GK_MKRANDOM_PROTO(gk_idx, size_t, gk_idx_t)
+GK_MKRANDOM_PROTO(gk_z,   size_t, ssize_t)
+GK_MKRANDOM_PROTO(gk_zu,  size_t, size_t)
+void gk_randinit(uint64_t);
+uint64_t gk_randint64(void);
+uint32_t gk_randint32(void);
+
+
+/*-------------------------------------------------------------
+ * OpenMP fake functions
+ *-------------------------------------------------------------*/
+#if !defined(__OPENMP__)
+void omp_set_num_threads(int num_threads);
+int omp_get_num_threads(void);
+int omp_get_max_threads(void);
+int omp_get_thread_num(void);
+int omp_get_num_procs(void);
+int omp_in_parallel(void);
+void omp_set_dynamic(int num_threads);
+int omp_get_dynamic(void);
+void omp_set_nested(int nested);
+int omp_get_nested(void);
+#endif /* __OPENMP__ */
+
+
+/*-------------------------------------------------------------
+ * CSR-related functions
+ *-------------------------------------------------------------*/
+gk_csr_t *gk_csr_Create();
+void gk_csr_Init(gk_csr_t *mat);
+void gk_csr_Free(gk_csr_t **mat);
+void gk_csr_FreeContents(gk_csr_t *mat);
+gk_csr_t *gk_csr_Dup(gk_csr_t *mat);
+gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows);
+gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind);
+gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid);
+gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color);
+int gk_csr_DetermineFormat(char *filename, int format);
+gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering);
+void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering);
+gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf);
+gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction);
+gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval);
+gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore);
+void gk_csr_CompactColumns(gk_csr_t *mat);
+void gk_csr_SortIndices(gk_csr_t *mat, int what);
+void gk_csr_CreateIndex(gk_csr_t *mat, int what);
+void gk_csr_Normalize(gk_csr_t *mat, int what, int norm);
+void gk_csr_Scale(gk_csr_t *mat, int type);
+void gk_csr_ComputeSums(gk_csr_t *mat, int what);
+void gk_csr_ComputeNorms(gk_csr_t *mat, int what);
+void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what);
+gk_csr_t *gk_csr_Shuffle(gk_csr_t *mat, int what, int summetric);
+gk_csr_t *gk_csr_Transpose(gk_csr_t *mat);
+float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, int simtype);
+float gk_csr_ComputePairSimilarity(gk_csr_t *mat_a, gk_csr_t *mat_b, int i1, int i2, int what, int simtype);
+int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, float *qval,
+        int simtype, int nsim, float minsim, gk_fkv_t *hits, int *_imarker,
+        gk_fkv_t *i_cand);
+int gk_csr_FindConnectedComponents(gk_csr_t *mat, int32_t *cptr, int32_t *cind,
+        int32_t *cids);
+gk_csr_t *gk_csr_MakeSymmetric(gk_csr_t *mat, int op);
+gk_csr_t *gk_csr_ReorderSymmetric(gk_csr_t *mat, int32_t *perm, int32_t *iperm);
+void gk_csr_ComputeBFSOrderingSymmetric(gk_csr_t *mat, int maxdegree, int v, 
+          int32_t **r_perm, int32_t **r_iperm);
+void gk_csr_ComputeBestFOrderingSymmetric(gk_csr_t *mat, int v, int type,
+          int32_t **r_perm, int32_t **r_iperm);
+
+
+/* itemsets.c */
+void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind,
+        int minfreq, int maxfreq, int minlen, int maxlen,
+        void (*process_itemset)(void *stateptr, int nitems, int *itemind,
+                                int ntrans, int *tranind),
+        void *stateptr);
+
+
+/* evaluate.c */
+float ComputeAccuracy(int n, gk_fkv_t *list);
+float ComputeROCn(int n, int maxN, gk_fkv_t *list);
+float ComputeMedianRFP(int n, gk_fkv_t *list);
+float ComputeMean (int n, float *values);
+float ComputeStdDev(int  n, float *values);
+
+
+/* mcore.c */
+gk_mcore_t *gk_mcoreCreate(size_t coresize);
+gk_mcore_t *gk_gkmcoreCreate();
+void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats);
+void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats);
+void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes);
+void gk_mcorePush(gk_mcore_t *mcore);
+void gk_gkmcorePush(gk_mcore_t *mcore);
+void gk_mcorePop(gk_mcore_t *mcore);
+void gk_gkmcorePop(gk_mcore_t *mcore);
+void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr);
+void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr);
+void gk_mcoreDel(gk_mcore_t *mcore, void *ptr);
+void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr);
+
+/* rw.c */
+int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr);
+
+
+/* graph.c */
+gk_graph_t *gk_graph_Create();
+void gk_graph_Init(gk_graph_t *graph);
+void gk_graph_Free(gk_graph_t **graph);
+void gk_graph_FreeContents(gk_graph_t *graph);
+gk_graph_t *gk_graph_Read(char *filename, int format, int hasvals, 
+                 int numbering, int isfewgts, int isfvwgts, int isfvsizes);
+void gk_graph_Write(gk_graph_t *graph, char *filename, int format, int numbering);
+gk_graph_t *gk_graph_Dup(gk_graph_t *graph);
+gk_graph_t *gk_graph_Transpose(gk_graph_t *graph);
+gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs);
+gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm);
+int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind);
+void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm, 
+         int32_t **r_iperm);
+void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type,
+              int32_t **r_perm, int32_t **r_iperm);
+void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type,
+              int32_t **r_perm, int32_t **r_iperm);
+void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps);
+void gk_graph_SortAdjacencies(gk_graph_t *graph);
+gk_graph_t *gk_graph_MakeSymmetric(gk_graph_t *graph, int op);
+
+
+/* cache.c */
+gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits);
+void gk_cacheReset(gk_cache_t *cache);
+void gk_cacheDestroy(gk_cache_t **r_cache);
+int gk_cacheLoad(gk_cache_t *cache, size_t addr);
+double gk_cacheGetHitRate(gk_cache_t *cache);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
+
diff --git a/gk_struct.h b/gk_struct.h
new file mode 100644
index 0000000..2925e98
--- /dev/null
+++ b/gk_struct.h
@@ -0,0 +1,296 @@
+/*!
+\file gk_struct.h
+\brief This file contains various datastructures used/provided by GKlib
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_struct.h 21988 2018-04-16 00:11:19Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_STRUCT_H_
+#define _GK_STRUCT_H_
+
+
+/********************************************************************/
+/*! Generator for gk_??KeyVal_t data structure */
+/********************************************************************/
+#define GK_MKKEYVALUE_T(NAME, KEYTYPE, VALTYPE) \
+typedef struct {\
+  KEYTYPE key;\
+  VALTYPE val;\
+} NAME;\
+
+/* The actual KeyVal data structures */
+GK_MKKEYVALUE_T(gk_ckv_t,   char,     ssize_t)
+GK_MKKEYVALUE_T(gk_ikv_t,   int,      ssize_t)
+GK_MKKEYVALUE_T(gk_i8kv_t,  int8_t,   ssize_t)
+GK_MKKEYVALUE_T(gk_i16kv_t, int16_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_i32kv_t, int32_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_i64kv_t, int64_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_zkv_t,   ssize_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_zukv_t,  size_t,   ssize_t)
+GK_MKKEYVALUE_T(gk_fkv_t,   float,    ssize_t)
+GK_MKKEYVALUE_T(gk_dkv_t,   double,   ssize_t)
+GK_MKKEYVALUE_T(gk_skv_t,   char *,   ssize_t)
+GK_MKKEYVALUE_T(gk_idxkv_t, gk_idx_t, gk_idx_t)
+
+
+
+/********************************************************************/
+/*! Generator for gk_?pq_t data structure */
+/********************************************************************/
+#define GK_MKPQUEUE_T(NAME, KVTYPE)\
+typedef struct {\
+  size_t nnodes;\
+  size_t maxnodes;\
+\
+  /* Heap version of the data structure */ \
+  KVTYPE   *heap;\
+  ssize_t *locator;\
+} NAME;\
+
+GK_MKPQUEUE_T(gk_ipq_t,    gk_ikv_t)
+GK_MKPQUEUE_T(gk_i32pq_t,  gk_i32kv_t)
+GK_MKPQUEUE_T(gk_i64pq_t,  gk_i64kv_t)
+GK_MKPQUEUE_T(gk_fpq_t,    gk_fkv_t)
+GK_MKPQUEUE_T(gk_dpq_t,    gk_dkv_t)
+GK_MKPQUEUE_T(gk_idxpq_t,  gk_idxkv_t)
+
+
+#define GK_MKPQUEUE2_T(NAME, KTYPE, VTYPE)\
+typedef struct {\
+  ssize_t nnodes;\
+  ssize_t maxnodes;\
+\
+  /* Heap version of the data structure */ \
+  KTYPE *keys;\
+  VTYPE *vals;\
+} NAME;\
+
+
+
+/*-------------------------------------------------------------
+ * The following data structure stores a sparse CSR format
+ *-------------------------------------------------------------*/
+typedef struct gk_csr_t {
+  int32_t nrows, ncols;
+  ssize_t *rowptr, *colptr;
+  int32_t *rowind, *colind;
+  int32_t *rowids, *colids;
+  int32_t *rlabels, *clabels;
+  int32_t *rmap, *cmap;
+  float *rowval, *colval;
+  float *rnorms, *cnorms;
+  float *rsums, *csums;
+  float *rsizes, *csizes;
+  float *rvols, *cvols;
+  float *rwgts, *cwgts;
+} gk_csr_t;
+
+
+/*-------------------------------------------------------------
+ * The following data structure stores a sparse graph 
+ *-------------------------------------------------------------*/
+typedef struct gk_graph_t {
+  int32_t nvtxs;                /*!< The number of vertices in the graph */
+  ssize_t *xadj;                /*!< The ptr-structure of the adjncy list */
+  int32_t *adjncy;              /*!< The adjacency list of the graph */
+  int32_t *iadjwgt;             /*!< The integer edge weights */
+  float *fadjwgt;               /*!< The floating point edge weights */
+  int32_t *ivwgts;              /*!< The integer vertex weights */
+  float *fvwgts;                /*!< The floating point vertex weights */
+  int32_t *ivsizes;             /*!< The integer vertex sizes */
+  float *fvsizes;               /*!< The floating point vertex sizes */
+  int32_t *vlabels;             /*!< The labels of the vertices */
+} gk_graph_t;
+
+
+/*-------------------------------------------------------------
+ * The following data structure stores stores a string as a 
+ * pair of its allocated buffer and the buffer itself.
+ *-------------------------------------------------------------*/
+typedef struct gk_str_t {
+  size_t len;
+  char *buf;
+} gk_str_t;
+
+
+
+
+/*-------------------------------------------------------------
+* The following data structure implements a string-2-int mapping
+* table used for parsing command-line options
+*-------------------------------------------------------------*/
+typedef struct gk_StringMap_t {
+  char *name;
+  int id;
+} gk_StringMap_t;
+
+
+/*------------------------------------------------------------
+ * This structure implements a simple hash table
+ *------------------------------------------------------------*/
+typedef struct gk_HTable_t {
+  int nelements;          /* The overall size of the hash-table */
+  int htsize;             /* The current size of the hash-table */
+  gk_ikv_t *harray;       /* The actual hash-table */
+} gk_HTable_t;
+
+
+/*------------------------------------------------------------
+ * This structure implements a gk_Tokens_t list returned by the
+ * string tokenizer
+ *------------------------------------------------------------*/
+typedef struct gk_Tokens_t {
+  int ntoks;        /* The number of tokens in the input string */
+  char *strbuf;     /* The memory that stores all the entries */
+  char **list;      /* Pointers to the strbuf for each element */
+} gk_Tokens_t;
+
+
+/*------------------------------------------------------------
+ * This structure implements storage for an atom in a pdb file
+ *------------------------------------------------------------*/
+typedef struct atom {
+  int       serial;
+  char      *name;
+  char	    altLoc;
+  char      *resname;
+  char      chainid;	
+  int       rserial;
+  char	    icode;
+  char      element;
+  double    x;
+  double    y;
+  double    z;
+  double    opcy;
+  double    tmpt;
+} atom;
+
+
+/*------------------------------------------------------------
+ * This structure implements storage for a center of mass for
+ * a single residue.
+ *------------------------------------------------------------*/
+typedef struct center_of_mass {
+  char name;
+  double x;
+  double y;
+  double z;
+} center_of_mass;
+
+
+/*------------------------------------------------------------
+ * This structure implements storage for a pdb protein 
+ *------------------------------------------------------------*/
+typedef struct pdbf {
+	int natoms;			/* Number of atoms */
+	int nresidues;  /* Number of residues based on coordinates */
+	int ncas;
+	int nbbs;
+	int corruption;
+	char *resSeq;	      /* Residue sequence based on coordinates    */
+  char **threeresSeq; /* three-letter residue sequence */
+	atom *atoms;
+	atom **bbs;
+	atom **cas;
+  center_of_mass *cm;
+} pdbf;
+
+
+
+/*************************************************************
+* Localization Structures for converting characters to integers
+**************************************************************/
+typedef struct gk_i2cc2i_t {
+    int n;
+    char *i2c;
+    int *c2i;
+} gk_i2cc2i_t;
+ 
+
+/*******************************************************************
+ *This structure implements storage of a protein sequence
+ * *****************************************************************/
+typedef struct gk_seq_t {
+    
+    int len; /*Number of Residues */
+    int *sequence; /* Stores the sequence*/
+    
+    
+    int **pssm; /* Stores the pssm matrix */
+    int **psfm; /* Stores the psfm matrix */
+    char *name; /* Stores the name of the sequence */
+
+    int nsymbols;
+
+    
+} gk_seq_t;
+
+
+
+
+/*************************************************************************/
+/*! The following data structure stores information about a memory 
+    allocation operation that can either be served from gk_mcore_t or by
+    a gk_malloc if not sufficient workspace memory is available. */
+/*************************************************************************/
+typedef struct gk_mop_t {
+  int type;
+  ssize_t nbytes;
+  void *ptr;
+} gk_mop_t;
+
+
+/*************************************************************************/
+/*! The following structure defines the mcore for GKlib's customized
+    memory allocations. */
+/*************************************************************************/
+typedef struct gk_mcore_t {
+  /* Workspace information */
+  size_t coresize;     /*!< The amount of core memory that has been allocated */
+  size_t corecpos;     /*!< Index of the first free location in core */
+  void *core;	       /*!< Pointer to the core itself */
+
+  /* These are for implementing a stack-based allocation scheme using both
+     core and also dynamically allocated memory */
+  size_t nmops;         /*!< The number of maop_t entries that have been allocated */
+  size_t cmop;          /*!< Index of the first free location in maops */
+  gk_mop_t *mops;       /*!< The array recording the maop_t operations */
+
+  /* These are for keeping various statistics for wspacemalloc */
+  size_t num_callocs;   /*!< The number of core mallocs */
+  size_t num_hallocs;   /*!< The number of heap mallocs */
+  size_t size_callocs;  /*!< The total # of bytes in core mallocs */
+  size_t size_hallocs;  /*!< The total # of bytes in heap mallocs */
+  size_t cur_callocs;   /*!< The current # of bytes in core mallocs */
+  size_t cur_hallocs;   /*!< The current # of bytes in heap mallocs */
+  size_t max_callocs;   /*!< The maximum # of bytes in core mallocs at any given time */
+  size_t max_hallocs;   /*!< The maximum # of bytes in heap mallocs at any given time */
+
+} gk_mcore_t;
+
+
+/*************************************************************************/
+/*! The following structure is used for cache simulation for performance
+    modeling and analysis. */
+/*************************************************************************/
+typedef struct gk_cache_t {
+  /*! The total cache is nway*(2^(cnbits+lnbits)) bytes */
+  uint32_t nway;        /*!< the associativity of the cache */
+  uint32_t lnbits;      /*!< the number of address bits indexing the cache line */
+  uint32_t cnbits;      /*!< the number of address bits indexing the cache */
+  size_t csize;         /*!< 2^cnbits */
+  size_t cmask;         /*!< csize-1 */
+
+  uint64_t clock;       /*!< a clock in terms of accesses */
+  
+  uint64_t *latimes;    /*!< a cacheline-level last access time */
+  size_t *clines;       /*!< the cache in terms of cachelines */
+
+  uint64_t nhits;       /*!< counts the number of hits */
+  uint64_t nmisses;     /*!< counts the number of misses */
+} gk_cache_t;
+
+
+#endif
diff --git a/gk_types.h b/gk_types.h
new file mode 100644
index 0000000..57c1191
--- /dev/null
+++ b/gk_types.h
@@ -0,0 +1,38 @@
+/*!
+\file  gk_types.h
+\brief This file contains basic scalar datatype used in GKlib
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_types.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_TYPES_H_
+#define _GK_TYPES_H_
+
+/*************************************************************************
+* Basic data type definitions. These definitions allow GKlib to separate
+* the following elemental types:
+* - loop iterator variables, which are set to size_t
+* - signed and unsigned int variables that can be set to any # of bits
+* - signed and unsigned long variables that can be set to any # of bits
+* - real variables, which can be set to single or double precision.
+**************************************************************************/
+/*typedef ptrdiff_t       gk_idx_t;       */  /* index variable */
+typedef ssize_t         gk_idx_t;         /* index variable */
+typedef int32_t         gk_int_t;         /* integer values */
+typedef uint32_t        gk_uint_t;        /* unsigned integer values */
+typedef int64_t         gk_long_t;        /* long integer values */
+typedef uint64_t        gk_ulong_t;       /* unsigned long integer values */
+typedef float           gk_real_t;        /* real type */
+typedef double          gk_dreal_t;       /* double precission real type */
+typedef double          gk_wclock_t;	  /* wall-clock time */
+
+/*#define GK_IDX_MAX PTRDIFF_MAX*/
+#define GK_IDX_MAX ((SIZE_MAX>>1)-2)
+
+#define PRIGKIDX "zd"
+#define SCNGKIDX "zd"
+
+
+#endif
diff --git a/gk_util.c b/gk_util.c
new file mode 100644
index 0000000..e1e68db
--- /dev/null
+++ b/gk_util.c
@@ -0,0 +1,107 @@
+/*!
+\file  util.c
+\brief Various utility routines
+
+\date   Started 4/12/2007
+\author George
+\version\verbatim $Id: gk_util.c 16223 2014-02-15 21:34:09Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+/*************************************************************************
+* This file randomly permutes the contents of an array.
+* flag == 0, don't initialize perm
+* flag == 1, set p[i] = i 
+**************************************************************************/
+void gk_RandomPermute(size_t n, int *p, int flag)
+{
+  size_t i, u, v;
+  int tmp;
+
+  if (flag == 1) {
+    for (i=0; i<n; i++)
+      p[i] = i;
+  }
+
+  for (i=0; i<n/2; i++) {
+    v = RandomInRange(n);
+    u = RandomInRange(n);
+    gk_SWAP(p[v], p[u], tmp);
+  }
+}
+
+
+/************************************************************************/
+/*!
+\brief Converts an element-based set membership into a CSR-format set-based
+       membership.
+
+For example, it takes an array such as part[] that stores where each 
+element belongs to and returns a pair of arrays (pptr[], pind[]) that 
+store in CSF format the list of elements belonging in each partition.
+
+\param n      
+  the number of elements in the array (e.g., # of vertices)
+\param range  
+  the cardinality of the set (e.g., # of partitions)
+\param array
+  the array that stores the per-element set membership
+\param ptr
+  the array that will store the starting indices in ind for
+  the elements of each set. This is filled by the routine and
+  its size should be at least range+1.
+\param ind
+  the array that stores consecutively which elements belong to
+  each set. The size of this array should be n.
+*/
+/************************************************************************/
+void gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind)
+{
+  size_t i;
+
+  gk_iset(range+1, 0, ptr);
+
+  for (i=0; i<n; i++) 
+    ptr[array[i]]++;
+
+  /* Compute the ptr, ind structure */
+  MAKECSR(i, range, ptr);
+  for (i=0; i<n; i++)
+    ind[ptr[array[i]]++] = i;
+  SHIFTCSR(i, range, ptr);
+}
+
+
+/*************************************************************************
+* This function returns the log2(x)
+**************************************************************************/
+int gk_log2(int a)
+{
+  size_t i;
+
+  for (i=1; a > 1; i++, a = a>>1);
+  return i-1;
+}
+
+
+/*************************************************************************
+* This function checks if the argument is a power of 2
+**************************************************************************/
+int gk_ispow2(int a)
+{
+  return (a == (1<<gk_log2(a)));
+}
+
+
+/*************************************************************************
+* This function returns the log2(x)
+**************************************************************************/
+float gk_flog2(float a)
+{
+  return log(a)/log(2.0);
+}
+
+
diff --git a/gkregex.c b/gkregex.c
new file mode 100644
index 0000000..8a09caa
--- /dev/null
+++ b/gkregex.c
@@ -0,0 +1,10704 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* this is for removing a compiler warning */
+void gkfooo() { return; }
+
+#ifdef USE_GKREGEX
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean.  */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+	__regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+	__re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+	__re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+	__re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+	__re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+# include "../locale/localeinfo.h"
+#endif
+
+#include "GKlib.h"
+
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regex_internal.h" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__MINGW32_VERSION) || defined(_MSC_VER)
+#define strcasecmp stricmp
+#endif
+
+#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+# include <langinfo.h>
+#endif
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+#if defined HAVE_STDBOOL_H || defined _LIBC
+# include <stdbool.h>
+#else
+typedef enum { false, true } bool;
+#endif /* HAVE_STDBOOL_H || _LIBC */
+#if defined HAVE_STDINT_H || defined _LIBC
+# include <stdint.h>
+#endif /* HAVE_STDINT_H || _LIBC */
+#if defined _LIBC
+# include <bits/libc-lock.h>
+#else
+# define __libc_lock_define(CLASS,NAME)
+# define __libc_lock_init(NAME) do { } while (0)
+# define __libc_lock_lock(NAME) do { } while (0)
+# define __libc_lock_unlock(NAME) do { } while (0)
+#endif
+
+/* In case that the system doesn't have isblank().  */
+#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+#  define _RE_DEFINE_LOCALE_FUNCTIONS 1
+#   include <locale/localeinfo.h>
+#   include <locale/elem-hash.h>
+#   include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages.  */
+#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+#  undef gettext
+#  define gettext(msgid) \
+  INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+   strings.  */
+# define gettext_noop(String) String
+#endif
+
+/* For loser systems without the definition.  */
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# define inline
+#endif
+
+/* Number of single byte character.  */
+#define SBC_MAX 256
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline.  */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc.  */
+#ifndef _LIBC
+# define __wctype wctype
+# define __iswctype iswctype
+# define __btowc btowc
+# define __mempcpy mempcpy
+# define __wcrtomb wcrtomb
+# define __regfree regfree
+# define attribute_hidden
+#endif /* not _LIBC */
+
+#ifdef __GNUC__
+# define __attribute(arg) __attribute__ (arg)
+#else
+# define __attribute(arg)
+#endif
+
+extern const char __re_error_msgid[] attribute_hidden;
+extern const size_t __re_error_msgid_idx[] attribute_hidden;
+
+/* An integer used to represent a set of bits.  It must be unsigned,
+   and must be at least as wide as unsigned int.  */
+typedef unsigned long int bitset_word_t;
+/* All bits set in a bitset_word_t.  */
+#define BITSET_WORD_MAX ULONG_MAX
+/* Number of bits in a bitset_word_t.  */
+#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
+/* Number of bitset_word_t in a bit_set.  */
+#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
+typedef bitset_word_t bitset_t[BITSET_WORDS];
+typedef bitset_word_t *re_bitset_ptr_t;
+typedef const bitset_word_t *re_const_bitset_ptr_t;
+
+#define bitset_set(set,i) \
+  (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
+#define bitset_clear(set,i) \
+  (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_contain(set,i) \
+  (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
+#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
+#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define WORD_DELIM_CONSTRAINT 0x0100
+#define NOT_WORD_DELIM_CONSTRAINT 0x0200
+
+typedef enum
+{
+  INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+  LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+  BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+  BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+  WORD_DELIM = WORD_DELIM_CONSTRAINT,
+  NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+  int alloc;
+  int nelem;
+  int *elems;
+} re_node_set;
+
+typedef enum
+{
+  NON_TYPE = 0,
+
+  /* Node type, These are used by token, node, tree.  */
+  CHARACTER = 1,
+  END_OF_RE = 2,
+  SIMPLE_BRACKET = 3,
+  OP_BACK_REF = 4,
+  OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+  COMPLEX_BRACKET = 6,
+  OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+  /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
+     when the debugger shows values of this enum type.  */
+#define EPSILON_BIT 8
+  OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+  OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+  OP_ALT = EPSILON_BIT | 2,
+  OP_DUP_ASTERISK = EPSILON_BIT | 3,
+  ANCHOR = EPSILON_BIT | 4,
+
+  /* Tree type, these are used only by tree. */
+  CONCAT = 16,
+  SUBEXP = 17,
+
+  /* Token type, these are used only by token.  */
+  OP_DUP_PLUS = 18,
+  OP_DUP_QUESTION,
+  OP_OPEN_BRACKET,
+  OP_CLOSE_BRACKET,
+  OP_CHARSET_RANGE,
+  OP_OPEN_DUP_NUM,
+  OP_CLOSE_DUP_NUM,
+  OP_NON_MATCH_LIST,
+  OP_OPEN_COLL_ELEM,
+  OP_CLOSE_COLL_ELEM,
+  OP_OPEN_EQUIV_CLASS,
+  OP_CLOSE_EQUIV_CLASS,
+  OP_OPEN_CHAR_CLASS,
+  OP_CLOSE_CHAR_CLASS,
+  OP_WORD,
+  OP_NOTWORD,
+  OP_SPACE,
+  OP_NOTSPACE,
+  BACK_SLASH
+
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+  /* Multibyte characters.  */
+  wchar_t *mbchars;
+
+  /* Collating symbols.  */
+# ifdef _LIBC
+  int32_t *coll_syms;
+# endif
+
+  /* Equivalence classes. */
+# ifdef _LIBC
+  int32_t *equiv_classes;
+# endif
+
+  /* Range expressions. */
+# ifdef _LIBC
+  uint32_t *range_starts;
+  uint32_t *range_ends;
+# else /* not _LIBC */
+  wchar_t *range_starts;
+  wchar_t *range_ends;
+# endif /* not _LIBC */
+
+  /* Character classes. */
+  wctype_t *char_classes;
+
+  /* If this character set is the non-matching list.  */
+  unsigned int non_match : 1;
+
+  /* # of multibyte characters.  */
+  int nmbchars;
+
+  /* # of collating symbols.  */
+  int ncoll_syms;
+
+  /* # of equivalence classes. */
+  int nequiv_classes;
+
+  /* # of range expressions. */
+  int nranges;
+
+  /* # of character classes. */
+  int nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+  union
+  {
+    unsigned char c;		/* for CHARACTER */
+    re_bitset_ptr_t sbcset;	/* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+    re_charset_t *mbcset;	/* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+    int idx;			/* for BACK_REF */
+    re_context_type ctx_type;	/* for ANCHOR */
+  } opr;
+#if __GNUC__ >= 2
+  re_token_type_t type : 8;
+#else
+  re_token_type_t type;
+#endif
+  unsigned int constraint : 10;	/* context constraint */
+  unsigned int duplicated : 1;
+  unsigned int opt_subexp : 1;
+#ifdef RE_ENABLE_I18N
+  unsigned int accept_mb : 1;
+  /* These 2 bits can be moved into the union if needed (e.g. if running out
+     of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
+  unsigned int mb_partial : 1;
+#endif
+  unsigned int word_char : 1;
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+
+struct re_string_t
+{
+  /* Indicate the raw buffer which is the original string passed as an
+     argument of regexec(), re_search(), etc..  */
+  const unsigned char *raw_mbs;
+  /* Store the multibyte string.  In case of "case insensitive mode" like
+     REG_ICASE, upper cases of the string are stored, otherwise MBS points
+     the same address that RAW_MBS points.  */
+  unsigned char *mbs;
+#ifdef RE_ENABLE_I18N
+  /* Store the wide character string which is corresponding to MBS.  */
+  wint_t *wcs;
+  int *offsets;
+  mbstate_t cur_state;
+#endif
+  /* Index in RAW_MBS.  Each character mbs[i] corresponds to
+     raw_mbs[raw_mbs_idx + i].  */
+  int raw_mbs_idx;
+  /* The length of the valid characters in the buffers.  */
+  int valid_len;
+  /* The corresponding number of bytes in raw_mbs array.  */
+  int valid_raw_len;
+  /* The length of the buffers MBS and WCS.  */
+  int bufs_len;
+  /* The index in MBS, which is updated by re_string_fetch_byte.  */
+  int cur_idx;
+  /* length of RAW_MBS array.  */
+  int raw_len;
+  /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN.  */
+  int len;
+  /* End of the buffer may be shorter than its length in the cases such
+     as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
+     instead of LEN.  */
+  int raw_stop;
+  /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS.  */
+  int stop;
+
+  /* The context of mbs[0].  We store the context independently, since
+     the context of mbs[0] may be different from raw_mbs[0], which is
+     the beginning of the input string.  */
+  unsigned int tip_context;
+  /* The translation passed as a part of an argument of re_compile_pattern.  */
+  RE_TRANSLATE_TYPE trans;
+  /* Copy of re_dfa_t's word_char.  */
+  re_const_bitset_ptr_t word_char;
+  /* 1 if REG_ICASE.  */
+  unsigned char icase;
+  unsigned char is_utf8;
+  unsigned char map_notascii;
+  unsigned char mbs_allocated;
+  unsigned char offsets_needed;
+  unsigned char newline_anchor;
+  unsigned char word_ops_used;
+  int mb_cur_max;
+};
+typedef struct re_string_t re_string_t;
+
+
+struct re_dfa_t;
+typedef struct re_dfa_t re_dfa_t;
+
+#ifndef _LIBC
+# ifdef __i386__
+#  define internal_function   __attribute ((regparm (3), stdcall))
+# else
+#  define internal_function
+# endif
+#endif
+
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+						int new_buf_len)
+     internal_function;
+#ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr) internal_function;
+static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
+#endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr) internal_function;
+static void re_string_translate_buffer (re_string_t *pstr) internal_function;
+static unsigned int re_string_context_at (const re_string_t *input, int idx,
+					  int eflags)
+     internal_function __attribute ((pure));
+#define re_string_peek_byte(pstr, offset) \
+  ((pstr)->mbs[(pstr)->cur_idx + offset])
+#define re_string_fetch_byte(pstr) \
+  ((pstr)->mbs[(pstr)->cur_idx++])
+#define re_string_first_byte(pstr, idx) \
+  ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
+#define re_string_is_single_byte_char(pstr, idx) \
+  ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
+				|| (pstr)->wcs[(idx) + 1] != WEOF))
+#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
+#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
+#define re_string_get_buffer(pstr) ((pstr)->mbs)
+#define re_string_length(pstr) ((pstr)->len)
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
+#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
+#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+
+#ifdef __GNUC__
+# define alloca(size)   __builtin_alloca (size)
+# define HAVE_ALLOCA 1
+#elif defined(_MSC_VER)
+# include <malloc.h>
+# define alloca _alloca
+# define HAVE_ALLOCA 1
+#else
+# error No alloca()
+#endif
+
+#ifndef _LIBC
+# if HAVE_ALLOCA
+/* The OS usually guarantees only one guard page at the bottom of the stack,
+   and a page size can be as small as 4096 bytes.  So we cannot safely
+   allocate anything larger than 4096 bytes.  Also care for the possibility
+   of a few compiler-allocated temporary stack slots.  */
+#  define __libc_use_alloca(n) ((n) < 4032)
+# else
+/* alloca is implemented with malloc, so just use malloc.  */
+#  define __libc_use_alloca(n) 0
+# endif
+#endif
+
+#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
+#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_free(p) free (p)
+
+struct bin_tree_t
+{
+  struct bin_tree_t *parent;
+  struct bin_tree_t *left;
+  struct bin_tree_t *right;
+  struct bin_tree_t *first;
+  struct bin_tree_t *next;
+
+  re_token_t token;
+
+  /* `node_idx' is the index in dfa->nodes, if `type' == 0.
+     Otherwise `type' indicate the type of this node.  */
+  int node_idx;
+};
+typedef struct bin_tree_t bin_tree_t;
+
+#define BIN_TREE_STORAGE_SIZE \
+  ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
+
+struct bin_tree_storage_t
+{
+  struct bin_tree_storage_t *next;
+  bin_tree_t data[BIN_TREE_STORAGE_SIZE];
+};
+typedef struct bin_tree_storage_t bin_tree_storage_t;
+
+#define CONTEXT_WORD 1
+#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
+#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
+#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
+
+#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
+#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
+#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
+#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
+#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
+
+#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
+#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
+#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
+#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
+
+#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
+ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+  || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+  || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
+  || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
+
+#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
+ ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+  || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+  || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
+  || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
+
+struct re_dfastate_t
+{
+  unsigned int hash;
+  re_node_set nodes;
+  re_node_set non_eps_nodes;
+  re_node_set inveclosure;
+  re_node_set *entrance_nodes;
+  struct re_dfastate_t **trtable, **word_trtable;
+  unsigned int context : 4;
+  unsigned int halt : 1;
+  /* If this state can accept `multi byte'.
+     Note that we refer to multibyte characters, and multi character
+     collating elements as `multi byte'.  */
+  unsigned int accept_mb : 1;
+  /* If this state has backreference node(s).  */
+  unsigned int has_backref : 1;
+  unsigned int has_constraint : 1;
+};
+typedef struct re_dfastate_t re_dfastate_t;
+
+struct re_state_table_entry
+{
+  int num;
+  int alloc;
+  re_dfastate_t **array;
+};
+
+/* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
+
+typedef struct
+{
+  int next_idx;
+  int alloc;
+  re_dfastate_t **array;
+} state_array_t;
+
+/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
+
+typedef struct
+{
+  int node;
+  int str_idx; /* The position NODE match at.  */
+  state_array_t path;
+} re_sub_match_last_t;
+
+/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
+   And information about the node, whose type is OP_CLOSE_SUBEXP,
+   corresponding to NODE is stored in LASTS.  */
+
+typedef struct
+{
+  int str_idx;
+  int node;
+  state_array_t *path;
+  int alasts; /* Allocation size of LASTS.  */
+  int nlasts; /* The number of LASTS.  */
+  re_sub_match_last_t **lasts;
+} re_sub_match_top_t;
+
+struct re_backref_cache_entry
+{
+  int node;
+  int str_idx;
+  int subexp_from;
+  int subexp_to;
+  char more;
+  char unused;
+  unsigned short int eps_reachable_subexps_map;
+};
+
+typedef struct
+{
+  /* The string object corresponding to the input string.  */
+  re_string_t input;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+  const re_dfa_t *const dfa;
+#else
+  const re_dfa_t *dfa;
+#endif
+  /* EFLAGS of the argument of regexec.  */
+  int eflags;
+  /* Where the matching ends.  */
+  int match_last;
+  int last_node;
+  /* The state log used by the matcher.  */
+  re_dfastate_t **state_log;
+  int state_log_top;
+  /* Back reference cache.  */
+  int nbkref_ents;
+  int abkref_ents;
+  struct re_backref_cache_entry *bkref_ents;
+  int max_mb_elem_len;
+  int nsub_tops;
+  int asub_tops;
+  re_sub_match_top_t **sub_tops;
+} re_match_context_t;
+
+typedef struct
+{
+  re_dfastate_t **sifted_states;
+  re_dfastate_t **limited_states;
+  int last_node;
+  int last_str_idx;
+  re_node_set limits;
+} re_sift_context_t;
+
+struct re_fail_stack_ent_t
+{
+  int idx;
+  int node;
+  regmatch_t *regs;
+  re_node_set eps_via_nodes;
+};
+
+struct re_fail_stack_t
+{
+  int num;
+  int alloc;
+  struct re_fail_stack_ent_t *stack;
+};
+
+struct re_dfa_t
+{
+  re_token_t *nodes;
+  size_t nodes_alloc;
+  size_t nodes_len;
+  int *nexts;
+  int *org_indices;
+  re_node_set *edests;
+  re_node_set *eclosures;
+  re_node_set *inveclosures;
+  struct re_state_table_entry *state_table;
+  re_dfastate_t *init_state;
+  re_dfastate_t *init_state_word;
+  re_dfastate_t *init_state_nl;
+  re_dfastate_t *init_state_begbuf;
+  bin_tree_t *str_tree;
+  bin_tree_storage_t *str_tree_storage;
+  re_bitset_ptr_t sb_char;
+  int str_tree_storage_idx;
+
+  /* number of subexpressions `re_nsub' is in regex_t.  */
+  unsigned int state_hash_mask;
+  int init_node;
+  int nbackref; /* The number of backreference in this dfa.  */
+
+  /* Bitmap expressing which backreference is used.  */
+  bitset_word_t used_bkref_map;
+  bitset_word_t completed_bkref_map;
+
+  unsigned int has_plural_match : 1;
+  /* If this dfa has "multibyte node", which is a backreference or
+     a node which can accept multibyte character or multi character
+     collating element.  */
+  unsigned int has_mb_node : 1;
+  unsigned int is_utf8 : 1;
+  unsigned int map_notascii : 1;
+  unsigned int word_ops_used : 1;
+  int mb_cur_max;
+  bitset_t word_char;
+  reg_syntax_t syntax;
+  int *subexp_map;
+#ifdef DEBUG
+  char* re_str;
+#endif
+  __libc_lock_define (, lock)
+};
+
+#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
+#define re_node_set_remove(set,id) \
+  (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
+#define re_node_set_empty(p) ((p)->nelem = 0)
+#define re_node_set_free(set) re_free ((set)->elems)
+
+
+typedef enum
+{
+  SB_CHAR,
+  MB_CHAR,
+  EQUIV_CLASS,
+  COLL_SYM,
+  CHAR_CLASS
+} bracket_elem_type;
+
+typedef struct
+{
+  bracket_elem_type type;
+  union
+  {
+    unsigned char ch;
+    unsigned char *name;
+    wchar_t wch;
+  } opr;
+} bracket_elem_t;
+
+
+/* Inline functions for bitset operation.  */
+static inline void
+bitset_not (bitset_t set)
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+    set[bitset_i] = ~set[bitset_i];
+}
+
+static inline void
+bitset_merge (bitset_t dest, const bitset_t src)
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+    dest[bitset_i] |= src[bitset_i];
+}
+
+static inline void
+bitset_mask (bitset_t dest, const bitset_t src)
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+    dest[bitset_i] &= src[bitset_i];
+}
+
+#ifdef RE_ENABLE_I18N
+/* Inline functions for re_string.  */
+static inline int
+internal_function __attribute ((pure))
+re_string_char_size_at (const re_string_t *pstr, int idx)
+{
+  int byte_idx;
+  if (pstr->mb_cur_max == 1)
+    return 1;
+  for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
+    if (pstr->wcs[idx + byte_idx] != WEOF)
+      break;
+  return byte_idx;
+}
+
+static inline wint_t
+internal_function __attribute ((pure))
+re_string_wchar_at (const re_string_t *pstr, int idx)
+{
+  if (pstr->mb_cur_max == 1)
+    return (wint_t) pstr->mbs[idx];
+  return (wint_t) pstr->wcs[idx];
+}
+
+static int
+internal_function __attribute ((pure))
+re_string_elem_size_at (const re_string_t *pstr, int idx)
+{
+# ifdef _LIBC
+  const unsigned char *p, *extra;
+  const int32_t *table, *indirect;
+  int32_t tmp;
+#  include <locale/weight.h>
+  uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+
+  if (nrules != 0)
+    {
+      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+      extra = (const unsigned char *)
+	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						_NL_COLLATE_INDIRECTMB);
+      p = pstr->mbs + idx;
+      tmp = findidx (&p);
+      return p - pstr->mbs - idx;
+    }
+  else
+# endif /* _LIBC */
+    return 1;
+}
+#endif /* RE_ENABLE_I18N */
+
+#endif /*  _REGEX_INTERNAL_H */
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regex_internal.c" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+static void re_string_construct_common (const char *str, int len,
+					re_string_t *pstr,
+					RE_TRANSLATE_TYPE trans, int icase,
+					const re_dfa_t *dfa) internal_function;
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
+					  const re_node_set *nodes,
+					  unsigned int hash) internal_function;
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
+					  const re_node_set *nodes,
+					  unsigned int context,
+					  unsigned int hash) internal_function;
+
+/* Functions for string operation.  */
+
+/* This function allocate the buffers.  It is necessary to call
+   re_string_reconstruct before using the object.  */
+
+static reg_errcode_t
+internal_function
+re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
+		    RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+  reg_errcode_t ret;
+  int init_buf_len;
+
+  /* Ensure at least one character fits into the buffers.  */
+  if (init_len < dfa->mb_cur_max)
+    init_len = dfa->mb_cur_max;
+  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+  re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+  ret = re_string_realloc_buffers (pstr, init_buf_len);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  pstr->word_char = dfa->word_char;
+  pstr->word_ops_used = dfa->word_ops_used;
+  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
+  pstr->valid_raw_len = pstr->valid_len;
+  return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them.  */
+
+static reg_errcode_t
+internal_function
+re_string_construct (re_string_t *pstr, const char *str, int len,
+		     RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+  reg_errcode_t ret;
+  memset (pstr, '\0', sizeof (re_string_t));
+  re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+  if (len > 0)
+    {
+      ret = re_string_realloc_buffers (pstr, len + 1);
+      if (BE (ret != REG_NOERROR, 0))
+	return ret;
+    }
+  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+
+  if (icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	{
+	  while (1)
+	    {
+	      ret = build_wcs_upper_buffer (pstr);
+	      if (BE (ret != REG_NOERROR, 0))
+		return ret;
+	      if (pstr->valid_raw_len >= len)
+		break;
+	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
+		break;
+	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+	      if (BE (ret != REG_NOERROR, 0))
+		return ret;
+	    }
+	}
+      else
+#endif /* RE_ENABLE_I18N  */
+	build_upper_buffer (pstr);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+	{
+	  if (trans != NULL)
+	    re_string_translate_buffer (pstr);
+	  else
+	    {
+	      pstr->valid_len = pstr->bufs_len;
+	      pstr->valid_raw_len = pstr->bufs_len;
+	    }
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct.  */
+
+static reg_errcode_t
+internal_function
+re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
+{
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1)
+    {
+      wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
+      if (BE (new_wcs == NULL, 0))
+	return REG_ESPACE;
+      pstr->wcs = new_wcs;
+      if (pstr->offsets != NULL)
+	{
+	  int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
+	  if (BE (new_offsets == NULL, 0))
+	    return REG_ESPACE;
+	  pstr->offsets = new_offsets;
+	}
+    }
+#endif /* RE_ENABLE_I18N  */
+  if (pstr->mbs_allocated)
+    {
+      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
+					   new_buf_len);
+      if (BE (new_mbs == NULL, 0))
+	return REG_ESPACE;
+      pstr->mbs = new_mbs;
+    }
+  pstr->bufs_len = new_buf_len;
+  return REG_NOERROR;
+}
+
+
+static void
+internal_function
+re_string_construct_common (const char *str, int len, re_string_t *pstr,
+			    RE_TRANSLATE_TYPE trans, int icase,
+			    const re_dfa_t *dfa)
+{
+  pstr->raw_mbs = (const unsigned char *) str;
+  pstr->len = len;
+  pstr->raw_len = len;
+  pstr->trans = trans;
+  pstr->icase = icase ? 1 : 0;
+  pstr->mbs_allocated = (trans != NULL || icase);
+  pstr->mb_cur_max = dfa->mb_cur_max;
+  pstr->is_utf8 = dfa->is_utf8;
+  pstr->map_notascii = dfa->map_notascii;
+  pstr->stop = pstr->len;
+  pstr->raw_stop = pstr->stop;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+   If the byte sequence of the string are:
+     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+   Then wide character buffer will be:
+     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
+   We use WEOF for padding, they indicate that the position isn't
+   a first byte of a multibyte character.
+
+   Note that this function assumes PSTR->VALID_LEN elements are already
+   built and starts from PSTR->VALID_LEN.  */
+
+static void
+internal_function
+build_wcs_buffer (re_string_t *pstr)
+{
+#ifdef _LIBC
+  unsigned char buf[MB_LEN_MAX];
+  assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+  unsigned char buf[64];
+#endif
+  mbstate_t prev_st;
+  int byte_idx, end_idx, remain_len;
+  size_t mbclen;
+
+  /* Build the buffers from pstr->valid_len to either pstr->len or
+     pstr->bufs_len.  */
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+    {
+      wchar_t wc;
+      const char *p;
+
+      remain_len = end_idx - byte_idx;
+      prev_st = pstr->cur_state;
+      /* Apply the translation if we need.  */
+      if (BE (pstr->trans != NULL, 0))
+	{
+	  int i, ch;
+
+	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+	    {
+	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
+	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
+	    }
+	  p = (const char *) buf;
+	}
+      else
+	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
+      mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2, 0))
+	{
+	  /* The buffer doesn't have enough space, finish to build.  */
+	  pstr->cur_state = prev_st;
+	  break;
+	}
+      else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+	{
+	  /* We treat these cases as a singlebyte character.  */
+	  mbclen = 1;
+	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+	  if (BE (pstr->trans != NULL, 0))
+	    wc = pstr->trans[wc];
+	  pstr->cur_state = prev_st;
+	}
+
+      /* Write wide character and padding.  */
+      pstr->wcs[byte_idx++] = wc;
+      /* Write paddings.  */
+      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+	pstr->wcs[byte_idx++] = WEOF;
+    }
+  pstr->valid_len = byte_idx;
+  pstr->valid_raw_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+   but for REG_ICASE.  */
+
+static reg_errcode_t
+internal_function
+build_wcs_upper_buffer (re_string_t *pstr)
+{
+  mbstate_t prev_st;
+  int src_idx, byte_idx, end_idx, remain_len;
+  size_t mbclen;
+#ifdef _LIBC
+  char buf[MB_LEN_MAX];
+  assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+  char buf[64];
+#endif
+
+  byte_idx = pstr->valid_len;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  /* The following optimization assumes that ASCII characters can be
+     mapped to wide characters with a simple cast.  */
+  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
+    {
+      while (byte_idx < end_idx)
+	{
+	  wchar_t wc;
+
+	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
+	      && mbsinit (&pstr->cur_state))
+	    {
+	      /* In case of a singlebyte character.  */
+	      pstr->mbs[byte_idx]
+		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
+	      /* The next step uses the assumption that wchar_t is encoded
+		 ASCII-safe: all ASCII values can be converted like this.  */
+	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
+	      ++byte_idx;
+	      continue;
+	    }
+
+	  remain_len = end_idx - byte_idx;
+	  prev_st = pstr->cur_state;
+	  mbclen = mbrtowc (&wc,
+			    ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+			     + byte_idx), remain_len, &pstr->cur_state);
+	  if (BE (mbclen + 2 > 2, 1))
+	    {
+	      wchar_t wcu = wc;
+	      if (iswlower (wc))
+		{
+		  size_t mbcdlen;
+
+		  wcu = towupper (wc);
+		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
+		  if (BE (mbclen == mbcdlen, 1))
+		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
+		  else
+		    {
+		      src_idx = byte_idx;
+		      goto offsets_needed;
+		    }
+		}
+	      else
+		memcpy (pstr->mbs + byte_idx,
+			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+	      pstr->wcs[byte_idx++] = wcu;
+	      /* Write paddings.  */
+	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+		pstr->wcs[byte_idx++] = WEOF;
+	    }
+	  else if (mbclen == (size_t) -1 || mbclen == 0)
+	    {
+	      /* It is an invalid character or '\0'.  Just use the byte.  */
+	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+	      pstr->mbs[byte_idx] = ch;
+	      /* And also cast it to wide char.  */
+	      pstr->wcs[byte_idx++] = (wchar_t) ch;
+	      if (BE (mbclen == (size_t) -1, 0))
+		pstr->cur_state = prev_st;
+	    }
+	  else
+	    {
+	      /* The buffer doesn't have enough space, finish to build.  */
+	      pstr->cur_state = prev_st;
+	      break;
+	    }
+	}
+      pstr->valid_len = byte_idx;
+      pstr->valid_raw_len = byte_idx;
+      return REG_NOERROR;
+    }
+  else
+    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
+      {
+	wchar_t wc;
+	const char *p;
+      offsets_needed:
+	remain_len = end_idx - byte_idx;
+	prev_st = pstr->cur_state;
+	if (BE (pstr->trans != NULL, 0))
+	  {
+	    int i, ch;
+
+	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+	      {
+		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
+		buf[i] = pstr->trans[ch];
+	      }
+	    p = (const char *) buf;
+	  }
+	else
+	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
+	mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+	if (BE (mbclen + 2 > 2, 1))
+	  {
+	    wchar_t wcu = wc;
+	    if (iswlower (wc))
+	      {
+		size_t mbcdlen;
+
+		wcu = towupper (wc);
+		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
+		if (BE (mbclen == mbcdlen, 1))
+		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
+		else if (mbcdlen != (size_t) -1)
+		  {
+		    size_t i;
+
+		    if (byte_idx + mbcdlen > pstr->bufs_len)
+		      {
+			pstr->cur_state = prev_st;
+			break;
+		      }
+
+		    if (pstr->offsets == NULL)
+		      {
+			pstr->offsets = re_malloc (int, pstr->bufs_len);
+
+			if (pstr->offsets == NULL)
+			  return REG_ESPACE;
+		      }
+		    if (!pstr->offsets_needed)
+		      {
+			for (i = 0; i < (size_t) byte_idx; ++i)
+			  pstr->offsets[i] = i;
+			pstr->offsets_needed = 1;
+		      }
+
+		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
+		    pstr->wcs[byte_idx] = wcu;
+		    pstr->offsets[byte_idx] = src_idx;
+		    for (i = 1; i < mbcdlen; ++i)
+		      {
+			pstr->offsets[byte_idx + i]
+			  = src_idx + (i < mbclen ? i : mbclen - 1);
+			pstr->wcs[byte_idx + i] = WEOF;
+		      }
+		    pstr->len += mbcdlen - mbclen;
+		    if (pstr->raw_stop > src_idx)
+		      pstr->stop += mbcdlen - mbclen;
+		    end_idx = (pstr->bufs_len > pstr->len)
+			      ? pstr->len : pstr->bufs_len;
+		    byte_idx += mbcdlen;
+		    src_idx += mbclen;
+		    continue;
+		  }
+                else
+                  memcpy (pstr->mbs + byte_idx, p, mbclen);
+	      }
+	    else
+	      memcpy (pstr->mbs + byte_idx, p, mbclen);
+
+	    if (BE (pstr->offsets_needed != 0, 0))
+	      {
+		size_t i;
+		for (i = 0; i < mbclen; ++i)
+		  pstr->offsets[byte_idx + i] = src_idx + i;
+	      }
+	    src_idx += mbclen;
+
+	    pstr->wcs[byte_idx++] = wcu;
+	    /* Write paddings.  */
+	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+	      pstr->wcs[byte_idx++] = WEOF;
+	  }
+	else if (mbclen == (size_t) -1 || mbclen == 0)
+	  {
+	    /* It is an invalid character or '\0'.  Just use the byte.  */
+	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
+
+	    if (BE (pstr->trans != NULL, 0))
+	      ch = pstr->trans [ch];
+	    pstr->mbs[byte_idx] = ch;
+
+	    if (BE (pstr->offsets_needed != 0, 0))
+	      pstr->offsets[byte_idx] = src_idx;
+	    ++src_idx;
+
+	    /* And also cast it to wide char.  */
+	    pstr->wcs[byte_idx++] = (wchar_t) ch;
+	    if (BE (mbclen == (size_t) -1, 0))
+	      pstr->cur_state = prev_st;
+	  }
+	else
+	  {
+	    /* The buffer doesn't have enough space, finish to build.  */
+	    pstr->cur_state = prev_st;
+	    break;
+	  }
+      }
+  pstr->valid_len = byte_idx;
+  pstr->valid_raw_len = src_idx;
+  return REG_NOERROR;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+   Return the index.  */
+
+static int
+internal_function
+re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
+{
+  mbstate_t prev_st;
+  int rawbuf_idx;
+  size_t mbclen;
+  wchar_t wc = WEOF;
+
+  /* Skip the characters which are not necessary to check.  */
+  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
+       rawbuf_idx < new_raw_idx;)
+    {
+      int remain_len;
+      remain_len = pstr->len - rawbuf_idx;
+      prev_st = pstr->cur_state;
+      mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
+			remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+	{
+	  /* We treat these cases as a single byte character.  */
+	  if (mbclen == 0 || remain_len == 0)
+	    wc = L'\0';
+	  else
+	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
+	  mbclen = 1;
+	  pstr->cur_state = prev_st;
+	}
+      /* Then proceed the next character.  */
+      rawbuf_idx += mbclen;
+    }
+  *last_wc = (wint_t) wc;
+  return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N  */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+   This function is used in case of REG_ICASE.  */
+
+static void
+internal_function
+build_upper_buffer (re_string_t *pstr)
+{
+  int char_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+      if (BE (pstr->trans != NULL, 0))
+	ch = pstr->trans[ch];
+      if (islower (ch))
+	pstr->mbs[char_idx] = toupper (ch);
+      else
+	pstr->mbs[char_idx] = ch;
+    }
+  pstr->valid_len = char_idx;
+  pstr->valid_raw_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR.  */
+
+static void
+internal_function
+re_string_translate_buffer (re_string_t *pstr)
+{
+  int buf_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+      pstr->mbs[buf_idx] = pstr->trans[ch];
+    }
+
+  pstr->valid_len = buf_idx;
+  pstr->valid_raw_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
+   convert to upper case in case of REG_ICASE, apply translation.  */
+
+static reg_errcode_t
+internal_function
+re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
+{
+  int offset = idx - pstr->raw_mbs_idx;
+  if (BE (offset < 0, 0))
+    {
+      /* Reset buffer.  */
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+      pstr->len = pstr->raw_len;
+      pstr->stop = pstr->raw_stop;
+      pstr->valid_len = 0;
+      pstr->raw_mbs_idx = 0;
+      pstr->valid_raw_len = 0;
+      pstr->offsets_needed = 0;
+      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+      if (!pstr->mbs_allocated)
+	pstr->mbs = (unsigned char *) pstr->raw_mbs;
+      offset = idx;
+    }
+
+  if (BE (offset != 0, 1))
+    {
+      /* Should the already checked characters be kept?  */
+      if (BE (offset < pstr->valid_raw_len, 1))
+	{
+	  /* Yes, move them to the front of the buffer.  */
+#ifdef RE_ENABLE_I18N
+	  if (BE (pstr->offsets_needed, 0))
+	    {
+	      int low = 0, high = pstr->valid_len, mid;
+	      do
+		{
+		  mid = (high + low) / 2;
+		  if (pstr->offsets[mid] > offset)
+		    high = mid;
+		  else if (pstr->offsets[mid] < offset)
+		    low = mid + 1;
+		  else
+		    break;
+		}
+	      while (low < high);
+	      if (pstr->offsets[mid] < offset)
+		++mid;
+	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
+							eflags);
+	      /* This can be quite complicated, so handle specially
+		 only the common and easy case where the character with
+		 different length representation of lower and upper
+		 case is present at or after offset.  */
+	      if (pstr->valid_len > offset
+		  && mid == offset && pstr->offsets[mid] == offset)
+		{
+		  memmove (pstr->wcs, pstr->wcs + offset,
+			   (pstr->valid_len - offset) * sizeof (wint_t));
+		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
+		  pstr->valid_len -= offset;
+		  pstr->valid_raw_len -= offset;
+		  for (low = 0; low < pstr->valid_len; low++)
+		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
+		}
+	      else
+		{
+		  /* Otherwise, just find out how long the partial multibyte
+		     character at offset is and fill it with WEOF/255.  */
+		  pstr->len = pstr->raw_len - idx + offset;
+		  pstr->stop = pstr->raw_stop - idx + offset;
+		  pstr->offsets_needed = 0;
+		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
+		    --mid;
+		  while (mid < pstr->valid_len)
+		    if (pstr->wcs[mid] != WEOF)
+		      break;
+		    else
+		      ++mid;
+		  if (mid == pstr->valid_len)
+		    pstr->valid_len = 0;
+		  else
+		    {
+		      pstr->valid_len = pstr->offsets[mid] - offset;
+		      if (pstr->valid_len)
+			{
+			  for (low = 0; low < pstr->valid_len; ++low)
+			    pstr->wcs[low] = WEOF;
+			  memset (pstr->mbs, 255, pstr->valid_len);
+			}
+		    }
+		  pstr->valid_raw_len = pstr->valid_len;
+		}
+	    }
+	  else
+#endif
+	    {
+	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
+							eflags);
+#ifdef RE_ENABLE_I18N
+	      if (pstr->mb_cur_max > 1)
+		memmove (pstr->wcs, pstr->wcs + offset,
+			 (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+	      if (BE (pstr->mbs_allocated, 0))
+		memmove (pstr->mbs, pstr->mbs + offset,
+			 pstr->valid_len - offset);
+	      pstr->valid_len -= offset;
+	      pstr->valid_raw_len -= offset;
+#if DEBUG
+	      assert (pstr->valid_len > 0);
+#endif
+	    }
+	}
+      else
+	{
+	  /* No, skip all characters until IDX.  */
+	  int prev_valid_len = pstr->valid_len;
+
+#ifdef RE_ENABLE_I18N
+	  if (BE (pstr->offsets_needed, 0))
+	    {
+	      pstr->len = pstr->raw_len - idx + offset;
+	      pstr->stop = pstr->raw_stop - idx + offset;
+	      pstr->offsets_needed = 0;
+	    }
+#endif
+	  pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+	  if (pstr->mb_cur_max > 1)
+	    {
+	      int wcs_idx;
+	      wint_t wc = WEOF;
+
+	      if (pstr->is_utf8)
+		{
+		  const unsigned char *raw, *p, *q, *end;
+
+		  /* Special case UTF-8.  Multi-byte chars start with any
+		     byte other than 0x80 - 0xbf.  */
+		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
+		  end = raw + (offset - pstr->mb_cur_max);
+		  if (end < pstr->raw_mbs)
+		    end = pstr->raw_mbs;
+		  p = raw + offset - 1;
+#ifdef _LIBC
+		  /* We know the wchar_t encoding is UCS4, so for the simple
+		     case, ASCII characters, skip the conversion step.  */
+		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
+		    {
+		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+		      /* pstr->valid_len = 0; */
+		      wc = (wchar_t) *p;
+		    }
+		  else
+#endif
+		    for (; p >= end; --p)
+		      if ((*p & 0xc0) != 0x80)
+			{
+			  mbstate_t cur_state;
+			  wchar_t wc2;
+			  int mlen = raw + pstr->len - p;
+			  unsigned char buf[6];
+			  size_t mbclen;
+
+			  q = p;
+			  if (BE (pstr->trans != NULL, 0))
+			    {
+			      int i = mlen < 6 ? mlen : 6;
+			      while (--i >= 0)
+				buf[i] = pstr->trans[p[i]];
+			      q = buf;
+			    }
+			  /* XXX Don't use mbrtowc, we know which conversion
+			     to use (UTF-8 -> UCS4).  */
+			  memset (&cur_state, 0, sizeof (cur_state));
+			  mbclen = mbrtowc (&wc2, (const char *) p, mlen,
+					    &cur_state);
+			  if (raw + offset - p <= mbclen
+			      && mbclen < (size_t) -2)
+			    {
+			      memset (&pstr->cur_state, '\0',
+				      sizeof (mbstate_t));
+			      pstr->valid_len = mbclen - (raw + offset - p);
+			      wc = wc2;
+			    }
+			  break;
+			}
+		}
+
+	      if (wc == WEOF)
+		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+	      if (wc == WEOF)
+		pstr->tip_context
+		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
+	      else
+		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
+				      && IS_WIDE_WORD_CHAR (wc))
+				     ? CONTEXT_WORD
+				     : ((IS_WIDE_NEWLINE (wc)
+					 && pstr->newline_anchor)
+					? CONTEXT_NEWLINE : 0));
+	      if (BE (pstr->valid_len, 0))
+		{
+		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+		    pstr->wcs[wcs_idx] = WEOF;
+		  if (pstr->mbs_allocated)
+		    memset (pstr->mbs, 255, pstr->valid_len);
+		}
+	      pstr->valid_raw_len = pstr->valid_len;
+	    }
+	  else
+#endif /* RE_ENABLE_I18N */
+	    {
+	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+	      pstr->valid_raw_len = 0;
+	      if (pstr->trans)
+		c = pstr->trans[c];
+	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
+				   ? CONTEXT_WORD
+				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
+				      ? CONTEXT_NEWLINE : 0));
+	    }
+	}
+      if (!BE (pstr->mbs_allocated, 0))
+	pstr->mbs += offset;
+    }
+  pstr->raw_mbs_idx = idx;
+  pstr->len -= offset;
+  pstr->stop -= offset;
+
+  /* Then build the buffers.  */
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1)
+    {
+      if (pstr->icase)
+	{
+	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
+	  if (BE (ret != REG_NOERROR, 0))
+	    return ret;
+	}
+      else
+	build_wcs_buffer (pstr);
+    }
+  else
+#endif /* RE_ENABLE_I18N */
+    if (BE (pstr->mbs_allocated, 0))
+      {
+	if (pstr->icase)
+	  build_upper_buffer (pstr);
+	else if (pstr->trans != NULL)
+	  re_string_translate_buffer (pstr);
+      }
+    else
+      pstr->valid_len = pstr->len;
+
+  pstr->cur_idx = 0;
+  return REG_NOERROR;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_peek_byte_case (const re_string_t *pstr, int idx)
+{
+  int ch, off;
+
+  /* Handle the common (easiest) cases first.  */
+  if (BE (!pstr->mbs_allocated, 1))
+    return re_string_peek_byte (pstr, idx);
+
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1
+      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
+    return re_string_peek_byte (pstr, idx);
+#endif
+
+  off = pstr->cur_idx + idx;
+#ifdef RE_ENABLE_I18N
+  if (pstr->offsets_needed)
+    off = pstr->offsets[off];
+#endif
+
+  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
+     this function returns CAPITAL LETTER I instead of first byte of
+     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
+     since peek_byte_case doesn't advance cur_idx in any way.  */
+  if (pstr->offsets_needed && !isascii (ch))
+    return re_string_peek_byte (pstr, idx);
+#endif
+
+  return ch;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_fetch_byte_case (re_string_t *pstr)
+{
+  if (BE (!pstr->mbs_allocated, 1))
+    return re_string_fetch_byte (pstr);
+
+#ifdef RE_ENABLE_I18N
+  if (pstr->offsets_needed)
+    {
+      int off, ch;
+
+      /* For tr_TR.UTF-8 [[:islower:]] there is
+	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
+	 in that case the whole multi-byte character and return
+	 the original letter.  On the other side, with
+	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
+	 anything else would complicate things too much.  */
+
+      if (!re_string_first_byte (pstr, pstr->cur_idx))
+	return re_string_fetch_byte (pstr);
+
+      off = pstr->offsets[pstr->cur_idx];
+      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+      if (! isascii (ch))
+	return re_string_fetch_byte (pstr);
+
+      re_string_skip_bytes (pstr,
+			    re_string_char_size_at (pstr, pstr->cur_idx));
+      return ch;
+    }
+#endif
+
+  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
+}
+
+static void
+internal_function
+re_string_destruct (re_string_t *pstr)
+{
+#ifdef RE_ENABLE_I18N
+  re_free (pstr->wcs);
+  re_free (pstr->offsets);
+#endif /* RE_ENABLE_I18N  */
+  if (pstr->mbs_allocated)
+    re_free (pstr->mbs);
+}
+
+/* Return the context at IDX in INPUT.  */
+
+static unsigned int
+internal_function
+re_string_context_at (const re_string_t *input, int idx, int eflags)
+{
+  int c;
+  if (BE (idx < 0, 0))
+    /* In this case, we use the value stored in input->tip_context,
+       since we can't know the character in input->mbs[-1] here.  */
+    return input->tip_context;
+  if (BE (idx == input->len, 0))
+    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1)
+    {
+      wint_t wc;
+      int wc_idx = idx;
+      while(input->wcs[wc_idx] == WEOF)
+	{
+#ifdef DEBUG
+	  /* It must not happen.  */
+	  assert (wc_idx >= 0);
+#endif
+	  --wc_idx;
+	  if (wc_idx < 0)
+	    return input->tip_context;
+	}
+      wc = input->wcs[wc_idx];
+      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
+	return CONTEXT_WORD;
+      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
+	      ? CONTEXT_NEWLINE : 0);
+    }
+  else
+#endif
+    {
+      c = re_string_byte_at (input, idx);
+      if (bitset_contain (input->word_char, c))
+	return CONTEXT_WORD;
+      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
+    }
+}
+
+/* Functions for set operation.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_alloc (re_node_set *set, int size)
+{
+  set->alloc = size;
+  set->nelem = 0;
+  set->elems = re_malloc (int, size);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_1 (re_node_set *set, int elem)
+{
+  set->alloc = 1;
+  set->nelem = 1;
+  set->elems = re_malloc (int, 1);
+  if (BE (set->elems == NULL, 0))
+    {
+      set->alloc = set->nelem = 0;
+      return REG_ESPACE;
+    }
+  set->elems[0] = elem;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
+{
+  set->alloc = 2;
+  set->elems = re_malloc (int, 2);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  if (elem1 == elem2)
+    {
+      set->nelem = 1;
+      set->elems[0] = elem1;
+    }
+  else
+    {
+      set->nelem = 2;
+      if (elem1 < elem2)
+	{
+	  set->elems[0] = elem1;
+	  set->elems[1] = elem2;
+	}
+      else
+	{
+	  set->elems[0] = elem2;
+	  set->elems[1] = elem1;
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
+{
+  dest->nelem = src->nelem;
+  if (src->nelem > 0)
+    {
+      dest->alloc = dest->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+	{
+	  dest->alloc = dest->nelem = 0;
+	  return REG_ESPACE;
+	}
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+    }
+  else
+    re_node_set_init_empty (dest);
+  return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
+			   const re_node_set *src2)
+{
+  int i1, i2, is, id, delta, sbase;
+  if (src1->nelem == 0 || src2->nelem == 0)
+    return REG_NOERROR;
+
+  /* We need dest->nelem + 2 * elems_in_intersection; this is a
+     conservative estimate.  */
+  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+    {
+      int new_alloc = src1->nelem + src2->nelem + dest->alloc;
+      int *new_elems = re_realloc (dest->elems, int, new_alloc);
+      if (BE (new_elems == NULL, 0))
+        return REG_ESPACE;
+      dest->elems = new_elems;
+      dest->alloc = new_alloc;
+    }
+
+  /* Find the items in the intersection of SRC1 and SRC2, and copy
+     into the top of DEST those that are not already in DEST itself.  */
+  sbase = dest->nelem + src1->nelem + src2->nelem;
+  i1 = src1->nelem - 1;
+  i2 = src2->nelem - 1;
+  id = dest->nelem - 1;
+  for (;;)
+    {
+      if (src1->elems[i1] == src2->elems[i2])
+	{
+	  /* Try to find the item in DEST.  Maybe we could binary search?  */
+	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
+	    --id;
+
+          if (id < 0 || dest->elems[id] != src1->elems[i1])
+            dest->elems[--sbase] = src1->elems[i1];
+
+	  if (--i1 < 0 || --i2 < 0)
+	    break;
+	}
+
+      /* Lower the highest of the two items.  */
+      else if (src1->elems[i1] < src2->elems[i2])
+	{
+	  if (--i2 < 0)
+	    break;
+	}
+      else
+	{
+	  if (--i1 < 0)
+	    break;
+	}
+    }
+
+  id = dest->nelem - 1;
+  is = dest->nelem + src1->nelem + src2->nelem - 1;
+  delta = is - sbase + 1;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place; this is more or
+     less the same loop that is in re_node_set_merge.  */
+  dest->nelem += delta;
+  if (delta > 0 && id >= 0)
+    for (;;)
+      {
+        if (dest->elems[is] > dest->elems[id])
+          {
+            /* Copy from the top.  */
+            dest->elems[id + delta--] = dest->elems[is--];
+            if (delta == 0)
+              break;
+          }
+        else
+          {
+            /* Slide from the bottom.  */
+            dest->elems[id + delta] = dest->elems[id];
+            if (--id < 0)
+              break;
+          }
+      }
+
+  /* Copy remaining SRC elements.  */
+  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
+
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
+			const re_node_set *src2)
+{
+  int i1, i2, id;
+  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+    {
+      dest->alloc = src1->nelem + src2->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+	return REG_ESPACE;
+    }
+  else
+    {
+      if (src1 != NULL && src1->nelem > 0)
+	return re_node_set_init_copy (dest, src1);
+      else if (src2 != NULL && src2->nelem > 0)
+	return re_node_set_init_copy (dest, src2);
+      else
+	re_node_set_init_empty (dest);
+      return REG_NOERROR;
+    }
+  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+    {
+      if (src1->elems[i1] > src2->elems[i2])
+	{
+	  dest->elems[id++] = src2->elems[i2++];
+	  continue;
+	}
+      if (src1->elems[i1] == src2->elems[i2])
+	++i2;
+      dest->elems[id++] = src1->elems[i1++];
+    }
+  if (i1 < src1->nelem)
+    {
+      memcpy (dest->elems + id, src1->elems + i1,
+	     (src1->nelem - i1) * sizeof (int));
+      id += src1->nelem - i1;
+    }
+  else if (i2 < src2->nelem)
+    {
+      memcpy (dest->elems + id, src2->elems + i2,
+	     (src2->nelem - i2) * sizeof (int));
+      id += src2->nelem - i2;
+    }
+  dest->nelem = id;
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_merge (re_node_set *dest, const re_node_set *src)
+{
+  int is, id, sbase, delta;
+  if (src == NULL || src->nelem == 0)
+    return REG_NOERROR;
+  if (dest->alloc < 2 * src->nelem + dest->nelem)
+    {
+      int new_alloc = 2 * (src->nelem + dest->alloc);
+      int *new_buffer = re_realloc (dest->elems, int, new_alloc);
+      if (BE (new_buffer == NULL, 0))
+	return REG_ESPACE;
+      dest->elems = new_buffer;
+      dest->alloc = new_alloc;
+    }
+
+  if (BE (dest->nelem == 0, 0))
+    {
+      dest->nelem = src->nelem;
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+      return REG_NOERROR;
+    }
+
+  /* Copy into the top of DEST the items of SRC that are not
+     found in DEST.  Maybe we could binary search in DEST?  */
+  for (sbase = dest->nelem + 2 * src->nelem,
+       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
+    {
+      if (dest->elems[id] == src->elems[is])
+        is--, id--;
+      else if (dest->elems[id] < src->elems[is])
+        dest->elems[--sbase] = src->elems[is--];
+      else /* if (dest->elems[id] > src->elems[is]) */
+        --id;
+    }
+
+  if (is >= 0)
+    {
+      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
+      sbase -= is + 1;
+      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
+    }
+
+  id = dest->nelem - 1;
+  is = dest->nelem + 2 * src->nelem - 1;
+  delta = is - sbase + 1;
+  if (delta == 0)
+    return REG_NOERROR;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place.  */
+  dest->nelem += delta;
+  for (;;)
+    {
+      if (dest->elems[is] > dest->elems[id])
+        {
+	  /* Copy from the top.  */
+          dest->elems[id + delta--] = dest->elems[is--];
+	  if (delta == 0)
+	    break;
+	}
+      else
+        {
+          /* Slide from the bottom.  */
+          dest->elems[id + delta] = dest->elems[id];
+	  if (--id < 0)
+	    {
+	      /* Copy remaining SRC elements.  */
+	      memcpy (dest->elems, dest->elems + sbase,
+	              delta * sizeof (int));
+	      break;
+	    }
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+   SET should not already have ELEM.
+   return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+internal_function
+re_node_set_insert (re_node_set *set, int elem)
+{
+  int idx;
+  /* In case the set is empty.  */
+  if (set->alloc == 0)
+    {
+      if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
+	return 1;
+      else
+	return -1;
+    }
+
+  if (BE (set->nelem, 0) == 0)
+    {
+      /* We already guaranteed above that set->alloc != 0.  */
+      set->elems[0] = elem;
+      ++set->nelem;
+      return 1;
+    }
+
+  /* Realloc if we need.  */
+  if (set->alloc == set->nelem)
+    {
+      int *new_elems;
+      set->alloc = set->alloc * 2;
+      new_elems = re_realloc (set->elems, int, set->alloc);
+      if (BE (new_elems == NULL, 0))
+	return -1;
+      set->elems = new_elems;
+    }
+
+  /* Move the elements which follows the new element.  Test the
+     first element separately to skip a check in the inner loop.  */
+  if (elem < set->elems[0])
+    {
+      idx = 0;
+      for (idx = set->nelem; idx > 0; idx--)
+        set->elems[idx] = set->elems[idx - 1];
+    }
+  else
+    {
+      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
+        set->elems[idx] = set->elems[idx - 1];
+    }
+
+  /* Insert the new element.  */
+  set->elems[idx] = elem;
+  ++set->nelem;
+  return 1;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+   SET should not already have any element greater than or equal to ELEM.
+   Return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+internal_function
+re_node_set_insert_last (re_node_set *set, int elem)
+{
+  /* Realloc if we need.  */
+  if (set->alloc == set->nelem)
+    {
+      int *new_elems;
+      set->alloc = (set->alloc + 1) * 2;
+      new_elems = re_realloc (set->elems, int, set->alloc);
+      if (BE (new_elems == NULL, 0))
+	return -1;
+      set->elems = new_elems;
+    }
+
+  /* Insert the new element.  */
+  set->elems[set->nelem++] = elem;
+  return 1;
+}
+
+/* Compare two node sets SET1 and SET2.
+   return 1 if SET1 and SET2 are equivalent, return 0 otherwise.  */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
+{
+  int i;
+  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+    return 0;
+  for (i = set1->nelem ; --i >= 0 ; )
+    if (set1->elems[i] != set2->elems[i])
+      return 0;
+  return 1;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_contains (const re_node_set *set, int elem)
+{
+  unsigned int idx, right, mid;
+  if (set->nelem <= 0)
+    return 0;
+
+  /* Binary search the element.  */
+  idx = 0;
+  right = set->nelem - 1;
+  while (idx < right)
+    {
+      mid = (idx + right) / 2;
+      if (set->elems[mid] < elem)
+	idx = mid + 1;
+      else
+	right = mid;
+    }
+  return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+internal_function
+re_node_set_remove_at (re_node_set *set, int idx)
+{
+  if (idx < 0 || idx >= set->nelem)
+    return;
+  --set->nelem;
+  for (; idx < set->nelem; idx++)
+    set->elems[idx] = set->elems[idx + 1];
+}
+
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+   Or return -1, if an error will be occured.  */
+
+static int
+internal_function
+re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
+{
+  int type = token.type;
+  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
+    {
+      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
+      int *new_nexts, *new_indices;
+      re_node_set *new_edests, *new_eclosures;
+      re_token_t *new_nodes;
+
+      /* Avoid overflows.  */
+      if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
+	return -1;
+
+      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
+      if (BE (new_nodes == NULL, 0))
+	return -1;
+      dfa->nodes = new_nodes;
+      new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+      new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
+      if (BE (new_nexts == NULL || new_indices == NULL
+	      || new_edests == NULL || new_eclosures == NULL, 0))
+	return -1;
+      dfa->nexts = new_nexts;
+      dfa->org_indices = new_indices;
+      dfa->edests = new_edests;
+      dfa->eclosures = new_eclosures;
+      dfa->nodes_alloc = new_nodes_alloc;
+    }
+  dfa->nodes[dfa->nodes_len] = token;
+  dfa->nodes[dfa->nodes_len].constraint = 0;
+#ifdef RE_ENABLE_I18N
+  dfa->nodes[dfa->nodes_len].accept_mb =
+    (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
+#endif
+  dfa->nexts[dfa->nodes_len] = -1;
+  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
+  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
+  return dfa->nodes_len++;
+}
+
+static inline unsigned int
+internal_function
+calc_state_hash (const re_node_set *nodes, unsigned int context)
+{
+  unsigned int hash = nodes->nelem + context;
+  int i;
+  for (i = 0 ; i < nodes->nelem ; i++)
+    hash += nodes->elems[i];
+  return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+	   return value is NULL and ERR is REG_NOERROR.
+	 - We never return non-NULL value in case of any errors, it is for
+	   optimization.  */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
+		  const re_node_set *nodes)
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (BE (nodes->nelem == 0, 0))
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, 0);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (hash != state->hash)
+	continue;
+      if (re_node_set_compare (&state->nodes, nodes))
+	return state;
+    }
+
+  /* There are no appropriate state in the dfa, create the new one.  */
+  new_state = create_ci_newstate (dfa, nodes, hash);
+  if (BE (new_state == NULL, 0))
+    *err = REG_ESPACE;
+
+  return new_state;
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+   whose context is equivalent to CONTEXT.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+	   return value is NULL and ERR is REG_NOERROR.
+	 - We never return non-NULL value in case of any errors, it is for
+	   optimization.  */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
+			  const re_node_set *nodes, unsigned int context)
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (nodes->nelem == 0)
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, context);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (state->hash == hash
+	  && state->context == context
+	  && re_node_set_compare (state->entrance_nodes, nodes))
+	return state;
+    }
+  /* There are no appropriate state in `dfa', create the new one.  */
+  new_state = create_cd_newstate (dfa, nodes, context, hash);
+  if (BE (new_state == NULL, 0))
+    *err = REG_ESPACE;
+
+  return new_state;
+}
+
+/* Finish initialization of the new state NEWSTATE, and using its hash value
+   HASH put in the appropriate bucket of DFA's state table.  Return value
+   indicates the error code if failed.  */
+
+static reg_errcode_t
+register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
+		unsigned int hash)
+{
+  struct re_state_table_entry *spot;
+  reg_errcode_t err;
+  int i;
+
+  newstate->hash = hash;
+  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
+  if (BE (err != REG_NOERROR, 0))
+    return REG_ESPACE;
+  for (i = 0; i < newstate->nodes.nelem; i++)
+    {
+      int elem = newstate->nodes.elems[i];
+      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
+        re_node_set_insert_last (&newstate->non_eps_nodes, elem);
+    }
+
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+  if (BE (spot->alloc <= spot->num, 0))
+    {
+      int new_alloc = 2 * spot->num + 2;
+      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
+					      new_alloc);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      spot->array = new_array;
+      spot->alloc = new_alloc;
+    }
+  spot->array[spot->num++] = newstate;
+  return REG_NOERROR;
+}
+
+static void
+free_state (re_dfastate_t *state)
+{
+  re_node_set_free (&state->non_eps_nodes);
+  re_node_set_free (&state->inveclosure);
+  if (state->entrance_nodes != &state->nodes)
+    {
+      re_node_set_free (state->entrance_nodes);
+      re_free (state->entrance_nodes);
+    }
+  re_node_set_free (&state->nodes);
+  re_free (state->word_trtable);
+  re_free (state->trtable);
+  re_free (state);
+}
+
+/* Create the new state which is independ of contexts.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+internal_function
+create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+		    unsigned int hash)
+{
+  int i;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+
+  newstate->entrance_nodes = &newstate->nodes;
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      if (type == CHARACTER && !node->constraint)
+	continue;
+#ifdef RE_ENABLE_I18N
+      newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+      /* If the state has the halt node, the state is a halt state.  */
+      if (type == END_OF_RE)
+	newstate->halt = 1;
+      else if (type == OP_BACK_REF)
+	newstate->has_backref = 1;
+      else if (type == ANCHOR || node->constraint)
+	newstate->has_constraint = 1;
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+internal_function
+create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+		    unsigned int context, unsigned int hash)
+{
+  int i, nctx_nodes = 0;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+
+  newstate->context = context;
+  newstate->entrance_nodes = &newstate->nodes;
+
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      unsigned int constraint = 0;
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      if (node->constraint)
+	constraint = node->constraint;
+
+      if (type == CHARACTER && !constraint)
+	continue;
+#ifdef RE_ENABLE_I18N
+      newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+      /* If the state has the halt node, the state is a halt state.  */
+      if (type == END_OF_RE)
+	newstate->halt = 1;
+      else if (type == OP_BACK_REF)
+	newstate->has_backref = 1;
+      else if (type == ANCHOR)
+	constraint = node->opr.ctx_type;
+
+      if (constraint)
+	{
+	  if (newstate->entrance_nodes == &newstate->nodes)
+	    {
+	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
+	      if (BE (newstate->entrance_nodes == NULL, 0))
+		{
+		  free_state (newstate);
+		  return NULL;
+		}
+	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
+	      nctx_nodes = 0;
+	      newstate->has_constraint = 1;
+	    }
+
+	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+	    {
+	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+	      ++nctx_nodes;
+	    }
+	}
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return  newstate;
+}
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regcomp.c" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+					  size_t length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+				     const re_dfastate_t *init_state,
+				     char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void optimize_utf8 (re_dfa_t *dfa);
+#endif
+static reg_errcode_t analyze (regex_t *preg);
+static reg_errcode_t preorder (bin_tree_t *root,
+			       reg_errcode_t (fn (void *, bin_tree_t *)),
+			       void *extra);
+static reg_errcode_t postorder (bin_tree_t *root,
+				reg_errcode_t (fn (void *, bin_tree_t *)),
+				void *extra);
+static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
+static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
+static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
+				 bin_tree_t *node);
+static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
+static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
+static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
+static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
+static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
+				   unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+					 int node, int root);
+static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
+static int fetch_number (re_string_t *input, re_token_t *token,
+			 reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+			reg_syntax_t syntax) internal_function;
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+			  reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+				  re_token_t *token, reg_syntax_t syntax,
+				  int nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+				 re_token_t *token, reg_syntax_t syntax,
+				 int nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+				     re_token_t *token, reg_syntax_t syntax,
+				     int nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+				  re_token_t *token, reg_syntax_t syntax,
+				  int nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+				 re_dfa_t *dfa, re_token_t *token,
+				 reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+				      re_token_t *token, reg_syntax_t syntax,
+				      reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+					    re_string_t *regexp,
+					    re_token_t *token, int token_len,
+					    re_dfa_t *dfa,
+					    reg_syntax_t syntax,
+					    int accept_hyphen);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+					  re_string_t *regexp,
+					  re_token_t *token);
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+					re_charset_t *mbcset,
+					int *equiv_class_alloc,
+					const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+				      bitset_t sbcset,
+				      re_charset_t *mbcset,
+				      int *char_class_alloc,
+				      const unsigned char *class_name,
+				      reg_syntax_t syntax);
+#else  /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+					const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+				      bitset_t sbcset,
+				      const unsigned char *class_name,
+				      reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
+				       RE_TRANSLATE_TYPE trans,
+				       const unsigned char *class_name,
+				       const unsigned char *extra,
+				       int non_match, reg_errcode_t *err);
+static bin_tree_t *create_tree (re_dfa_t *dfa,
+				bin_tree_t *left, bin_tree_t *right,
+				re_token_type_t type);
+static bin_tree_t *create_token_tree (re_dfa_t *dfa,
+				      bin_tree_t *left, bin_tree_t *right,
+				      const re_token_t *token);
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+static void free_token (re_token_t *node);
+static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
+static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
+
+/* This table gives an error message for each of the error codes listed
+   in regex.h.  Obviously the order here has to be same as there.
+   POSIX doesn't require that we do anything for REG_NOERROR,
+   but why not be nice?  */
+
+const char __re_error_msgid[] attribute_hidden =
+  {
+#define REG_NOERROR_IDX	0
+    gettext_noop ("Success")	/* REG_NOERROR */
+    "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+    gettext_noop ("No match")	/* REG_NOMATCH */
+    "\0"
+#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
+    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+    "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+    "\0"
+#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+    "\0"
+#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
+    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+    "\0"
+#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
+    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+    "\0"
+#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
+    gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
+    "\0"
+#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+    "\0"
+#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+    "\0"
+#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
+    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+    "\0"
+#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+    gettext_noop ("Invalid range end")	/* REG_ERANGE */
+    "\0"
+#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
+    gettext_noop ("Memory exhausted") /* REG_ESPACE */
+    "\0"
+#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
+    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+    "\0"
+#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+    gettext_noop ("Premature end of regular expression") /* REG_EEND */
+    "\0"
+#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
+    gettext_noop ("Regular expression too big") /* REG_ESIZE */
+    "\0"
+#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
+    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+  };
+
+const size_t __re_error_msgid_idx[] attribute_hidden =
+  {
+    REG_NOERROR_IDX,
+    REG_NOMATCH_IDX,
+    REG_BADPAT_IDX,
+    REG_ECOLLATE_IDX,
+    REG_ECTYPE_IDX,
+    REG_EESCAPE_IDX,
+    REG_ESUBREG_IDX,
+    REG_EBRACK_IDX,
+    REG_EPAREN_IDX,
+    REG_EBRACE_IDX,
+    REG_BADBR_IDX,
+    REG_ERANGE_IDX,
+    REG_ESPACE_IDX,
+    REG_BADRPT_IDX,
+    REG_EEND_IDX,
+    REG_ESIZE_IDX,
+    REG_ERPAREN_IDX
+  };
+
+/* Entry points for GNU code.  */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+   compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+   Returns 0 if the pattern was valid, otherwise an error string.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+   are set in BUFP on entry.  */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+    const char *pattern;
+    size_t length;
+    struct re_pattern_buffer *bufp;
+{
+  reg_errcode_t ret;
+
+  /* And GNU code determines whether or not to get register information
+     by passing null for the REGS argument to re_match, etc., not by
+     setting no_sub, unless RE_NO_SUB is set.  */
+  bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
+
+  /* Match anchors at newline.  */
+  bufp->newline_anchor = 1;
+
+  ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+  if (!ret)
+    return NULL;
+  return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+   also be assigned to arbitrarily: each pattern buffer stores its own
+   syntax, so it can be changed between regex compilations.  */
+/* This has no initializer because initialized variables in Emacs
+   become read-only after dumping.  */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation.  This provides
+   for compatibility for various utilities which historically have
+   different, incompatible syntaxes.
+
+   The argument SYNTAX is a bit mask comprised of the various bits
+   defined in regex.h.  We return the old syntax.  */
+
+reg_syntax_t
+re_set_syntax (syntax)
+    reg_syntax_t syntax;
+{
+  reg_syntax_t ret = re_syntax_options;
+
+  re_syntax_options = syntax;
+  return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (bufp)
+    struct re_pattern_buffer *bufp;
+{
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  char *fastmap = bufp->fastmap;
+
+  memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+  re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+  if (dfa->init_state != dfa->init_state_word)
+    re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+  if (dfa->init_state != dfa->init_state_nl)
+    re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+  if (dfa->init_state != dfa->init_state_begbuf)
+    re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+  bufp->fastmap_accurate = 1;
+  return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+__attribute ((always_inline))
+re_set_fastmap (char *fastmap, int icase, int ch)
+{
+  fastmap[ch] = 1;
+  if (icase)
+    fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+   Compile fastmap for the initial_state INIT_STATE.  */
+
+static void
+re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
+			 char *fastmap)
+{
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  int node_cnt;
+  int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
+  for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+    {
+      int node = init_state->nodes.elems[node_cnt];
+      re_token_type_t type = dfa->nodes[node].type;
+
+      if (type == CHARACTER)
+	{
+	  re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+#ifdef RE_ENABLE_I18N
+	  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+	    {
+	      unsigned char *buf = alloca (dfa->mb_cur_max), *p;
+	      wchar_t wc;
+	      mbstate_t state;
+
+	      p = buf;
+	      *p++ = dfa->nodes[node].opr.c;
+	      while (++node < dfa->nodes_len
+		     &&	dfa->nodes[node].type == CHARACTER
+		     && dfa->nodes[node].mb_partial)
+		*p++ = dfa->nodes[node].opr.c;
+	      memset (&state, '\0', sizeof (state));
+	      if (mbrtowc (&wc, (const char *) buf, p - buf,
+			   &state) == p - buf
+		  && (__wcrtomb ((char *) buf, towlower (wc), &state)
+		      != (size_t) -1))
+		re_set_fastmap (fastmap, 0, buf[0]);
+	    }
+#endif
+	}
+      else if (type == SIMPLE_BRACKET)
+	{
+	  int i, ch;
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    {
+	      int j;
+	      bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
+	      for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+		if (w & ((bitset_word_t) 1 << j))
+		  re_set_fastmap (fastmap, icase, ch);
+	    }
+	}
+#ifdef RE_ENABLE_I18N
+      else if (type == COMPLEX_BRACKET)
+	{
+	  int i;
+	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+	  if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
+	      || cset->nranges || cset->nchar_classes)
+	    {
+# ifdef _LIBC
+	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+		{
+		  /* In this case we want to catch the bytes which are
+		     the first byte of any collation elements.
+		     e.g. In da_DK, we want to catch 'a' since "aa"
+			  is a valid collation element, and don't catch
+			  'b' since 'b' is the only collation element
+			  which starts from 'b'.  */
+		  const int32_t *table = (const int32_t *)
+		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+		  for (i = 0; i < SBC_MAX; ++i)
+		    if (table[i] < 0)
+		      re_set_fastmap (fastmap, icase, i);
+		}
+# else
+	      if (dfa->mb_cur_max > 1)
+		for (i = 0; i < SBC_MAX; ++i)
+		  if (__btowc (i) == WEOF)
+		    re_set_fastmap (fastmap, icase, i);
+# endif /* not _LIBC */
+	    }
+	  for (i = 0; i < cset->nmbchars; ++i)
+	    {
+	      char buf[256];
+	      mbstate_t state;
+	      memset (&state, '\0', sizeof (state));
+	      if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+		re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+	      if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+		{
+		  if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+		      != (size_t) -1)
+		    re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
+		}
+	    }
+	}
+#endif /* RE_ENABLE_I18N */
+      else if (type == OP_PERIOD
+#ifdef RE_ENABLE_I18N
+	       || type == OP_UTF8_PERIOD
+#endif /* RE_ENABLE_I18N */
+	       || type == END_OF_RE)
+	{
+	  memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+	  if (type == END_OF_RE)
+	    bufp->can_be_null = 1;
+	  return;
+	}
+    }
+}
+
+/* Entry point for POSIX code.  */
+/* regcomp takes a regular expression as a string and compiles it.
+
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
+   since POSIX says we shouldn't.  Thus, we set
+
+     `buffer' to the compiled pattern;
+     `used' to the length of the compiled pattern;
+     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+       REG_EXTENDED bit in CFLAGS is set; otherwise, to
+       RE_SYNTAX_POSIX_BASIC;
+     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+     `fastmap' to an allocated space for the fastmap;
+     `fastmap_accurate' to zero;
+     `re_nsub' to the number of subexpressions in PATTERN.
+
+   PATTERN is the address of the pattern string.
+
+   CFLAGS is a series of bits which affect compilation.
+
+     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+     use POSIX basic syntax.
+
+     If REG_NEWLINE is set, then . and [^...] don't match newline.
+     Also, regexec will try a match beginning after every newline.
+
+     If REG_ICASE is set, then we considers upper- and lowercase
+     versions of letters to be equivalent when matching.
+
+     If REG_NOSUB is set, then when PREG is passed to regexec, that
+     routine will report only success or failure, and nothing about the
+     registers.
+
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
+   the return codes and their meanings.)  */
+
+int
+regcomp (preg, pattern, cflags)
+    regex_t *__restrict preg;
+    const char *__restrict pattern;
+    int cflags;
+{
+  reg_errcode_t ret;
+  reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+			 : RE_SYNTAX_POSIX_BASIC);
+
+  preg->buffer = NULL;
+  preg->allocated = 0;
+  preg->used = 0;
+
+  /* Try to allocate space for the fastmap.  */
+  preg->fastmap = re_malloc (char, SBC_MAX);
+  if (BE (preg->fastmap == NULL, 0))
+    return REG_ESPACE;
+
+  syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+  /* If REG_NEWLINE is set, newlines are treated differently.  */
+  if (cflags & REG_NEWLINE)
+    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
+      syntax &= ~RE_DOT_NEWLINE;
+      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+      /* It also changes the matching behavior.  */
+      preg->newline_anchor = 1;
+    }
+  else
+    preg->newline_anchor = 0;
+  preg->no_sub = !!(cflags & REG_NOSUB);
+  preg->translate = NULL;
+
+  ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+  /* POSIX doesn't distinguish between an unmatched open-group and an
+     unmatched close-group: both are REG_EPAREN.  */
+  if (ret == REG_ERPAREN)
+    ret = REG_EPAREN;
+
+  /* We have already checked preg->fastmap != NULL.  */
+  if (BE (ret == REG_NOERROR, 1))
+    /* Compute the fastmap now, since regexec cannot modify the pattern
+       buffer.  This function never fails in this implementation.  */
+    (void) re_compile_fastmap (preg);
+  else
+    {
+      /* Some error occurred while compiling the expression.  */
+      re_free (preg->fastmap);
+      preg->fastmap = NULL;
+    }
+
+  return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+   from either regcomp or regexec.   We don't use PREG here.  */
+
+/* regerror ( int errcode, preg, errbuf, errbuf_size) */
+size_t
+regerror (
+    int errcode,
+    const regex_t *__restrict preg,
+    char *__restrict errbuf,
+    size_t errbuf_size)
+{
+  const char *msg;
+  size_t msg_size;
+
+  if (BE (errcode < 0
+	  || errcode >= (int) (sizeof (__re_error_msgid_idx)
+			       / sizeof (__re_error_msgid_idx[0])), 0))
+    /* Only error codes returned by the rest of the code should be passed
+       to this routine.  If we are given anything else, or if other regex
+       code generates an invalid error code, then the program has a bug.
+       Dump core so we can fix it.  */
+    abort ();
+
+  msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+  msg_size = strlen (msg) + 1; /* Includes the null.  */
+
+  if (BE (errbuf_size != 0, 1))
+    {
+      if (BE (msg_size > errbuf_size, 0))
+	{
+#if defined HAVE_MEMPCPY || defined _LIBC
+	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+	  memcpy (errbuf, msg, errbuf_size - 1);
+	  errbuf[errbuf_size - 1] = 0;
+#endif
+	}
+      else
+	memcpy (errbuf, msg, msg_size);
+    }
+
+  return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+#ifdef RE_ENABLE_I18N
+/* This static array is used for the map to single-byte characters when
+   UTF-8 is used.  Otherwise we would allocate memory just to initialize
+   it the same all the time.  UTF-8 is the preferred encoding so this is
+   a worthwhile optimization.  */
+static const bitset_t utf8_sb_map =
+{
+  /* Set the first 128 bits.  */
+  [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
+};
+#endif
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+  int i, j;
+
+  if (dfa->nodes)
+    for (i = 0; i < dfa->nodes_len; ++i)
+      free_token (dfa->nodes + i);
+  re_free (dfa->nexts);
+  for (i = 0; i < dfa->nodes_len; ++i)
+    {
+      if (dfa->eclosures != NULL)
+	re_node_set_free (dfa->eclosures + i);
+      if (dfa->inveclosures != NULL)
+	re_node_set_free (dfa->inveclosures + i);
+      if (dfa->edests != NULL)
+	re_node_set_free (dfa->edests + i);
+    }
+  re_free (dfa->edests);
+  re_free (dfa->eclosures);
+  re_free (dfa->inveclosures);
+  re_free (dfa->nodes);
+
+  if (dfa->state_table)
+    for (i = 0; i <= dfa->state_hash_mask; ++i)
+      {
+	struct re_state_table_entry *entry = dfa->state_table + i;
+	for (j = 0; j < entry->num; ++j)
+	  {
+	    re_dfastate_t *state = entry->array[j];
+	    free_state (state);
+	  }
+        re_free (entry->array);
+      }
+  re_free (dfa->state_table);
+#ifdef RE_ENABLE_I18N
+  if (dfa->sb_char != utf8_sb_map)
+    re_free (dfa->sb_char);
+#endif
+  re_free (dfa->subexp_map);
+#ifdef DEBUG
+  re_free (dfa->re_str);
+#endif
+
+  re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG.  */
+
+void
+regfree (preg)
+    regex_t *preg;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  if (BE (dfa != NULL, 1))
+    free_dfa_content (dfa);
+  preg->buffer = NULL;
+  preg->allocated = 0;
+
+  re_free (preg->fastmap);
+  preg->fastmap = NULL;
+
+  re_free (preg->translate);
+  preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer.  */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+   these names if they don't use our functions, and still use
+   regcomp/regexec above without link errors.  */
+weak_function
+# endif
+re_comp (s)
+     const char *s;
+{
+  reg_errcode_t ret;
+  char *fastmap;
+
+  if (!s)
+    {
+      if (!re_comp_buf.buffer)
+	return gettext ("No previous regular expression");
+      return 0;
+    }
+
+  if (re_comp_buf.buffer)
+    {
+      fastmap = re_comp_buf.fastmap;
+      re_comp_buf.fastmap = NULL;
+      __regfree (&re_comp_buf);
+      memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+      re_comp_buf.fastmap = fastmap;
+    }
+
+  if (re_comp_buf.fastmap == NULL)
+    {
+      re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+      if (re_comp_buf.fastmap == NULL)
+	return (char *) gettext (__re_error_msgid
+				 + __re_error_msgid_idx[(int) REG_ESPACE]);
+    }
+
+  /* Since `re_exec' always passes NULL for the `regs' argument, we
+     don't need to initialize the pattern buffer fields which affect it.  */
+
+  /* Match anchors at newlines.  */
+  re_comp_buf.newline_anchor = 1;
+
+  ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+  if (!ret)
+    return NULL;
+
+  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+  return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+  __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.
+   Compile the regular expression PATTERN, whose length is LENGTH.
+   SYNTAX indicate regular expression's syntax.  */
+
+static reg_errcode_t
+re_compile_internal (regex_t *preg, const char * pattern, size_t length,
+		     reg_syntax_t syntax)
+{
+  reg_errcode_t err = REG_NOERROR;
+  re_dfa_t *dfa;
+  re_string_t regexp;
+
+  /* Initialize the pattern buffer.  */
+  preg->fastmap_accurate = 0;
+  preg->syntax = syntax;
+  preg->not_bol = preg->not_eol = 0;
+  preg->used = 0;
+  preg->re_nsub = 0;
+  preg->can_be_null = 0;
+  preg->regs_allocated = REGS_UNALLOCATED;
+
+  /* Initialize the dfa.  */
+  dfa = (re_dfa_t *) preg->buffer;
+  if (BE (preg->allocated < sizeof (re_dfa_t), 0))
+    {
+      /* If zero allocated, but buffer is non-null, try to realloc
+	 enough space.  This loses if buffer's address is bogus, but
+	 that is the user's responsibility.  If ->buffer is NULL this
+	 is a simple allocation.  */
+      dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+      if (dfa == NULL)
+	return REG_ESPACE;
+      preg->allocated = sizeof (re_dfa_t);
+      preg->buffer = (unsigned char *) dfa;
+    }
+  preg->used = sizeof (re_dfa_t);
+
+  err = init_dfa (dfa, length);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+#ifdef DEBUG
+  /* Note: length+1 will not overflow since it is checked in init_dfa.  */
+  dfa->re_str = re_malloc (char, length + 1);
+  strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+  __libc_lock_init (dfa->lock);
+
+  err = re_string_construct (&regexp, pattern, length, preg->translate,
+			     syntax & RE_ICASE, dfa);
+  if (BE (err != REG_NOERROR, 0))
+    {
+    re_compile_internal_free_return:
+      free_workarea_compile (preg);
+      re_string_destruct (&regexp);
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+
+  /* Parse the regular expression, and build a structure tree.  */
+  preg->re_nsub = 0;
+  dfa->str_tree = parse (&regexp, preg, syntax, &err);
+  if (BE (dfa->str_tree == NULL, 0))
+    goto re_compile_internal_free_return;
+
+  /* Analyze the tree and create the nfa.  */
+  err = analyze (preg);
+  if (BE (err != REG_NOERROR, 0))
+    goto re_compile_internal_free_return;
+
+#ifdef RE_ENABLE_I18N
+  /* If possible, do searching in single byte encoding to speed things up.  */
+  if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
+    optimize_utf8 (dfa);
+#endif
+
+  /* Then create the initial state of the dfa.  */
+  err = create_initial_state (dfa);
+
+  /* Release work areas.  */
+  free_workarea_compile (preg);
+  re_string_destruct (&regexp);
+
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+    }
+
+  return err;
+}
+
+/* Initialize DFA.  We use the length of the regular expression PAT_LEN
+   as the initial length of some arrays.  */
+
+static reg_errcode_t
+init_dfa (re_dfa_t *dfa, size_t pat_len)
+{
+  unsigned int table_size;
+#ifndef _LIBC
+  char *codeset_name;
+#endif
+
+  memset (dfa, '\0', sizeof (re_dfa_t));
+
+  /* Force allocation of str_tree_storage the first time.  */
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+
+  /* Avoid overflows.  */
+  if (pat_len == SIZE_MAX)
+    return REG_ESPACE;
+
+  dfa->nodes_alloc = pat_len + 1;
+  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+  /*  table_size = 2 ^ ceil(log pat_len) */
+  for (table_size = 1; ; table_size <<= 1)
+    if (table_size > pat_len)
+      break;
+
+  dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+  dfa->state_hash_mask = table_size - 1;
+
+  dfa->mb_cur_max = MB_CUR_MAX;
+#ifdef _LIBC
+  if (dfa->mb_cur_max == 6
+      && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
+    dfa->is_utf8 = 1;
+  dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
+		       != 0);
+#else
+# ifdef HAVE_LANGINFO_CODESET
+  codeset_name = nl_langinfo (CODESET);
+# else
+  codeset_name = getenv ("LC_ALL");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LC_CTYPE");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LANG");
+  if (codeset_name == NULL)
+    codeset_name = "";
+  else if (strchr (codeset_name, '.') !=  NULL)
+    codeset_name = strchr (codeset_name, '.') + 1;
+# endif
+
+  if (strcasecmp (codeset_name, "UTF-8") == 0
+      || strcasecmp (codeset_name, "UTF8") == 0)
+    dfa->is_utf8 = 1;
+
+  /* We check exhaustively in the loop below if this charset is a
+     superset of ASCII.  */
+  dfa->map_notascii = 0;
+#endif
+
+#ifdef RE_ENABLE_I18N
+  if (dfa->mb_cur_max > 1)
+    {
+      if (dfa->is_utf8)
+	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
+      else
+	{
+	  int i, j, ch;
+
+	  dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+	  if (BE (dfa->sb_char == NULL, 0))
+	    return REG_ESPACE;
+
+	  /* Set the bits corresponding to single byte chars.  */
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+	      {
+		wint_t wch = __btowc (ch);
+		if (wch != WEOF)
+		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
+# ifndef _LIBC
+		if (isascii (ch) && wch != ch)
+		  dfa->map_notascii = 1;
+# endif
+	      }
+	}
+    }
+#endif
+
+  if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
+    return REG_ESPACE;
+  return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+   "word".  In this case "word" means that it is the word construction
+   character used by some operators like "\<", "\>", etc.  */
+
+static void
+internal_function
+init_word_char (re_dfa_t *dfa)
+{
+  int i, j, ch;
+  dfa->word_ops_used = 1;
+  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+      if (isalnum (ch) || ch == '_')
+	dfa->word_char[i] |= (bitset_word_t) 1 << j;
+}
+
+/* Free the work area which are only used while compiling.  */
+
+static void
+free_workarea_compile (regex_t *preg)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_storage_t *storage, *next;
+  for (storage = dfa->str_tree_storage; storage; storage = next)
+    {
+      next = storage->next;
+      re_free (storage);
+    }
+  dfa->str_tree_storage = NULL;
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+  dfa->str_tree = NULL;
+  re_free (dfa->org_indices);
+  dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts.  */
+
+static reg_errcode_t
+create_initial_state (re_dfa_t *dfa)
+{
+  int first, i;
+  reg_errcode_t err;
+  re_node_set init_nodes;
+
+  /* Initial states have the epsilon closure of the node which is
+     the first node of the regular expression.  */
+  first = dfa->str_tree->first->node_idx;
+  dfa->init_node = first;
+  err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* The back-references which are in initial states can epsilon transit,
+     since in this case all of the subexpressions can be null.
+     Then we add epsilon closures of the nodes which are the next nodes of
+     the back-references.  */
+  if (dfa->nbackref > 0)
+    for (i = 0; i < init_nodes.nelem; ++i)
+      {
+	int node_idx = init_nodes.elems[i];
+	re_token_type_t type = dfa->nodes[node_idx].type;
+
+	int clexp_idx;
+	if (type != OP_BACK_REF)
+	  continue;
+	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+	  {
+	    re_token_t *clexp_node;
+	    clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+	    if (clexp_node->type == OP_CLOSE_SUBEXP
+		&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
+	      break;
+	  }
+	if (clexp_idx == init_nodes.nelem)
+	  continue;
+
+	if (type == OP_BACK_REF)
+	  {
+	    int dest_idx = dfa->edests[node_idx].elems[0];
+	    if (!re_node_set_contains (&init_nodes, dest_idx))
+	      {
+		re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+		i = 0;
+	      }
+	  }
+      }
+
+  /* It must be the first time to invoke acquire_state.  */
+  dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+  /* We don't check ERR here, since the initial state must not be NULL.  */
+  if (BE (dfa->init_state == NULL, 0))
+    return err;
+  if (dfa->init_state->has_constraint)
+    {
+      dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+						       CONTEXT_WORD);
+      dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+						     CONTEXT_NEWLINE);
+      dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+							 &init_nodes,
+							 CONTEXT_NEWLINE
+							 | CONTEXT_BEGBUF);
+      if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+	      || dfa->init_state_begbuf == NULL, 0))
+	return err;
+    }
+  else
+    dfa->init_state_word = dfa->init_state_nl
+      = dfa->init_state_begbuf = dfa->init_state;
+
+  re_node_set_free (&init_nodes);
+  return REG_NOERROR;
+}
+
+#ifdef RE_ENABLE_I18N
+/* If it is possible to do searching in single byte encoding instead of UTF-8
+   to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
+   DFA nodes where needed.  */
+
+static void
+optimize_utf8 (re_dfa_t *dfa)
+{
+  int node, i, mb_chars = 0, has_period = 0;
+
+  for (node = 0; node < dfa->nodes_len; ++node)
+    switch (dfa->nodes[node].type)
+      {
+      case CHARACTER:
+	if (dfa->nodes[node].opr.c >= 0x80)
+	  mb_chars = 1;
+	break;
+      case ANCHOR:
+	switch (dfa->nodes[node].opr.idx)
+	  {
+	  case LINE_FIRST:
+	  case LINE_LAST:
+	  case BUF_FIRST:
+	  case BUF_LAST:
+	    break;
+	  default:
+	    /* Word anchors etc. cannot be handled.  */
+	    return;
+	  }
+	break;
+      case OP_PERIOD:
+        has_period = 1;
+        break;
+      case OP_BACK_REF:
+      case OP_ALT:
+      case END_OF_RE:
+      case OP_DUP_ASTERISK:
+      case OP_OPEN_SUBEXP:
+      case OP_CLOSE_SUBEXP:
+	break;
+      case COMPLEX_BRACKET:
+	return;
+      case SIMPLE_BRACKET:
+	/* Just double check.  The non-ASCII range starts at 0x80.  */
+	assert (0x80 % BITSET_WORD_BITS == 0);
+        for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
+	  if (dfa->nodes[node].opr.sbcset[i])
+	    return;
+	break;
+      default:
+	abort ();
+      }
+
+  if (mb_chars || has_period)
+    for (node = 0; node < dfa->nodes_len; ++node)
+      {
+	if (dfa->nodes[node].type == CHARACTER
+	    && dfa->nodes[node].opr.c >= 0x80)
+	  dfa->nodes[node].mb_partial = 0;
+	else if (dfa->nodes[node].type == OP_PERIOD)
+	  dfa->nodes[node].type = OP_UTF8_PERIOD;
+      }
+
+  /* The search can be in single byte locale.  */
+  dfa->mb_cur_max = 1;
+  dfa->is_utf8 = 0;
+  dfa->has_mb_node = dfa->nbackref > 0 || has_period;
+}
+#endif
+
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+   "eclosure", and "inveclosure".  */
+
+static reg_errcode_t
+analyze (regex_t *preg)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  reg_errcode_t ret;
+
+  /* Allocate arrays.  */
+  dfa->nexts = re_malloc (int, dfa->nodes_alloc);
+  dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
+  dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+  dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+  if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+	  || dfa->eclosures == NULL, 0))
+    return REG_ESPACE;
+
+  dfa->subexp_map = re_malloc (int, preg->re_nsub);
+  if (dfa->subexp_map != NULL)
+    {
+      int i;
+      for (i = 0; i < preg->re_nsub; i++)
+	dfa->subexp_map[i] = i;
+      preorder (dfa->str_tree, optimize_subexps, dfa);
+      for (i = 0; i < preg->re_nsub; i++)
+	if (dfa->subexp_map[i] != i)
+	  break;
+      if (i == preg->re_nsub)
+	{
+	  free (dfa->subexp_map);
+	  dfa->subexp_map = NULL;
+	}
+    }
+
+  ret = postorder (dfa->str_tree, lower_subexps, preg);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  ret = postorder (dfa->str_tree, calc_first, dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  preorder (dfa->str_tree, calc_next, dfa);
+  ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  ret = calc_eclosure (dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  /* We only need this during the prune_impossible_nodes pass in regexec.c;
+     skip it if p_i_n will not run, as calc_inveclosure can be quadratic.  */
+  if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
+      || dfa->nbackref)
+    {
+      dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
+      if (BE (dfa->inveclosures == NULL, 0))
+        return REG_ESPACE;
+      ret = calc_inveclosure (dfa);
+    }
+
+  return ret;
+}
+
+/* Our parse trees are very unbalanced, so we cannot use a stack to
+   implement parse tree visits.  Instead, we use parent pointers and
+   some hairy code in these two functions.  */
+static reg_errcode_t
+postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+	   void *extra)
+{
+  bin_tree_t *node, *prev;
+
+  for (node = root; ; )
+    {
+      /* Descend down the tree, preferably to the left (or to the right
+	 if that's the only child).  */
+      while (node->left || node->right)
+	if (node->left)
+          node = node->left;
+        else
+          node = node->right;
+
+      do
+	{
+	  reg_errcode_t err = fn (extra, node);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+          if (node->parent == NULL)
+	    return REG_NOERROR;
+	  prev = node;
+	  node = node->parent;
+	}
+      /* Go up while we have a node that is reached from the right.  */
+      while (node->right == prev || node->right == NULL);
+      node = node->right;
+    }
+}
+
+static reg_errcode_t
+preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+	  void *extra)
+{
+  bin_tree_t *node;
+
+  for (node = root; ; )
+    {
+      reg_errcode_t err = fn (extra, node);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+
+      /* Go to the left node, or up and to the right.  */
+      if (node->left)
+	node = node->left;
+      else
+	{
+	  bin_tree_t *prev = NULL;
+	  while (node->right == prev || node->right == NULL)
+	    {
+	      prev = node;
+	      node = node->parent;
+	      if (!node)
+	        return REG_NOERROR;
+	    }
+	  node = node->right;
+	}
+    }
+}
+
+/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
+   re_search_internal to map the inner one's opr.idx to this one's.  Adjust
+   backreferences as well.  Requires a preorder visit.  */
+static reg_errcode_t
+optimize_subexps (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+
+  if (node->token.type == OP_BACK_REF && dfa->subexp_map)
+    {
+      int idx = node->token.opr.idx;
+      node->token.opr.idx = dfa->subexp_map[idx];
+      dfa->used_bkref_map |= 1 << node->token.opr.idx;
+    }
+
+  else if (node->token.type == SUBEXP
+           && node->left && node->left->token.type == SUBEXP)
+    {
+      int other_idx = node->left->token.opr.idx;
+
+      node->left = node->left->left;
+      if (node->left)
+        node->left->parent = node;
+
+      dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
+      if (other_idx < BITSET_WORD_BITS)
+	  dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
+    }
+
+  return REG_NOERROR;
+}
+
+/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
+   of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP.  */
+static reg_errcode_t
+lower_subexps (void *extra, bin_tree_t *node)
+{
+  regex_t *preg = (regex_t *) extra;
+  reg_errcode_t err = REG_NOERROR;
+
+  if (node->left && node->left->token.type == SUBEXP)
+    {
+      node->left = lower_subexp (&err, preg, node->left);
+      if (node->left)
+	node->left->parent = node;
+    }
+  if (node->right && node->right->token.type == SUBEXP)
+    {
+      node->right = lower_subexp (&err, preg, node->right);
+      if (node->right)
+	node->right->parent = node;
+    }
+
+  return err;
+}
+
+static bin_tree_t *
+lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *body = node->left;
+  bin_tree_t *op, *cls, *tree1, *tree;
+
+  if (preg->no_sub
+      /* We do not optimize empty subexpressions, because otherwise we may
+	 have bad CONCAT nodes with NULL children.  This is obviously not
+	 very common, so we do not lose much.  An example that triggers
+	 this case is the sed "script" /\(\)/x.  */
+      && node->left != NULL
+      && (node->token.opr.idx >= BITSET_WORD_BITS
+	  || !(dfa->used_bkref_map
+	       & ((bitset_word_t) 1 << node->token.opr.idx))))
+    return node->left;
+
+  /* Convert the SUBEXP node to the concatenation of an
+     OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP.  */
+  op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
+  cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
+  tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
+  tree = create_tree (dfa, op, tree1, CONCAT);
+  if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
+  op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
+  return tree;
+}
+
+/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
+   nodes.  Requires a postorder visit.  */
+static reg_errcode_t
+calc_first (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+  if (node->token.type == CONCAT)
+    {
+      node->first = node->left->first;
+      node->node_idx = node->left->node_idx;
+    }
+  else
+    {
+      node->first = node;
+      node->node_idx = re_dfa_add_node (dfa, node->token);
+      if (BE (node->node_idx == -1, 0))
+        return REG_ESPACE;
+    }
+  return REG_NOERROR;
+}
+
+/* Pass 2: compute NEXT on the tree.  Preorder visit.  */
+static reg_errcode_t
+calc_next (void *extra, bin_tree_t *node)
+{
+  switch (node->token.type)
+    {
+    case OP_DUP_ASTERISK:
+      node->left->next = node;
+      break;
+    case CONCAT:
+      node->left->next = node->right->first;
+      node->right->next = node->next;
+      break;
+    default:
+      if (node->left)
+	node->left->next = node->next;
+      if (node->right)
+        node->right->next = node->next;
+      break;
+    }
+  return REG_NOERROR;
+}
+
+/* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */
+static reg_errcode_t
+link_nfa_nodes (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+  int idx = node->node_idx;
+  reg_errcode_t err = REG_NOERROR;
+
+  switch (node->token.type)
+    {
+    case CONCAT:
+      break;
+
+    case END_OF_RE:
+      assert (node->next == NULL);
+      break;
+
+    case OP_DUP_ASTERISK:
+    case OP_ALT:
+      {
+	int left, right;
+	dfa->has_plural_match = 1;
+	if (node->left != NULL)
+	  left = node->left->first->node_idx;
+	else
+	  left = node->next->node_idx;
+	if (node->right != NULL)
+	  right = node->right->first->node_idx;
+	else
+	  right = node->next->node_idx;
+	assert (left > -1);
+	assert (right > -1);
+	err = re_node_set_init_2 (dfa->edests + idx, left, right);
+      }
+      break;
+
+    case ANCHOR:
+    case OP_OPEN_SUBEXP:
+    case OP_CLOSE_SUBEXP:
+      err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
+      break;
+
+    case OP_BACK_REF:
+      dfa->nexts[idx] = node->next->node_idx;
+      if (node->token.type == OP_BACK_REF)
+	re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
+      break;
+
+    default:
+      assert (!IS_EPSILON_NODE (node->token.type));
+      dfa->nexts[idx] = node->next->node_idx;
+      break;
+    }
+
+  return err;
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+   Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+   to their own constraint.  */
+
+static reg_errcode_t
+internal_function
+duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
+			int root_node, unsigned int init_constraint)
+{
+  int org_node, clone_node, ret;
+  unsigned int constraint = init_constraint;
+  for (org_node = top_org_node, clone_node = top_clone_node;;)
+    {
+      int org_dest, clone_dest;
+      if (dfa->nodes[org_node].type == OP_BACK_REF)
+	{
+	  /* If the back reference epsilon-transit, its destination must
+	     also have the constraint.  Then duplicate the epsilon closure
+	     of the destination of the back reference, and store it in
+	     edests of the back reference.  */
+	  org_dest = dfa->nexts[org_node];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  dfa->nexts[clone_node] = dfa->nexts[org_node];
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      else if (dfa->edests[org_node].nelem == 0)
+	{
+	  /* In case of the node can't epsilon-transit, don't duplicate the
+	     destination and store the original destination as the
+	     destination of the node.  */
+	  dfa->nexts[clone_node] = dfa->nexts[org_node];
+	  break;
+	}
+      else if (dfa->edests[org_node].nelem == 1)
+	{
+	  /* In case of the node can epsilon-transit, and it has only one
+	     destination.  */
+	  org_dest = dfa->edests[org_node].elems[0];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  if (dfa->nodes[org_node].type == ANCHOR)
+	    {
+	      /* In case of the node has another constraint, append it.  */
+	      if (org_node == root_node && clone_node != org_node)
+		{
+		  /* ...but if the node is root_node itself, it means the
+		     epsilon closure have a loop, then tie it to the
+		     destination of the root_node.  */
+		  ret = re_node_set_insert (dfa->edests + clone_node,
+					    org_dest);
+		  if (BE (ret < 0, 0))
+		    return REG_ESPACE;
+		  break;
+		}
+	      constraint |= dfa->nodes[org_node].opr.ctx_type;
+	    }
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      else /* dfa->edests[org_node].nelem == 2 */
+	{
+	  /* In case of the node can epsilon-transit, and it has two
+	     destinations. In the bin_tree_t and DFA, that's '|' and '*'.   */
+	  org_dest = dfa->edests[org_node].elems[0];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  /* Search for a duplicated node which satisfies the constraint.  */
+	  clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+	  if (clone_dest == -1)
+	    {
+	      /* There are no such a duplicated node, create a new one.  */
+	      reg_errcode_t err;
+	      clone_dest = duplicate_node (dfa, org_dest, constraint);
+	      if (BE (clone_dest == -1, 0))
+		return REG_ESPACE;
+	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	      err = duplicate_node_closure (dfa, org_dest, clone_dest,
+					    root_node, constraint);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	  else
+	    {
+	      /* There are a duplicated node which satisfy the constraint,
+		 use it to avoid infinite loop.  */
+	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	    }
+
+	  org_dest = dfa->edests[org_node].elems[1];
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      org_node = org_dest;
+      clone_node = clone_dest;
+    }
+  return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+   satisfies the constraint CONSTRAINT.  */
+
+static int
+search_duplicated_node (const re_dfa_t *dfa, int org_node,
+			unsigned int constraint)
+{
+  int idx;
+  for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+    {
+      if (org_node == dfa->org_indices[idx]
+	  && constraint == dfa->nodes[idx].constraint)
+	return idx; /* Found.  */
+    }
+  return -1; /* Not found.  */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+   Return the index of the new node, or -1 if insufficient storage is
+   available.  */
+
+static int
+duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
+{
+  int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
+  if (BE (dup_idx != -1, 1))
+    {
+      dfa->nodes[dup_idx].constraint = constraint;
+      if (dfa->nodes[org_idx].type == ANCHOR)
+	dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
+      dfa->nodes[dup_idx].duplicated = 1;
+
+      /* Store the index of the original node.  */
+      dfa->org_indices[dup_idx] = org_idx;
+    }
+  return dup_idx;
+}
+
+static reg_errcode_t
+calc_inveclosure (re_dfa_t *dfa)
+{
+  int src, idx, ret;
+  for (idx = 0; idx < dfa->nodes_len; ++idx)
+    re_node_set_init_empty (dfa->inveclosures + idx);
+
+  for (src = 0; src < dfa->nodes_len; ++src)
+    {
+      int *elems = dfa->eclosures[src].elems;
+      for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+	{
+	  ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
+	  if (BE (ret == -1, 0))
+	    return REG_ESPACE;
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Calculate "eclosure" for all the node in DFA.  */
+
+static reg_errcode_t
+calc_eclosure (re_dfa_t *dfa)
+{
+  int node_idx, incomplete;
+#ifdef DEBUG
+  assert (dfa->nodes_len > 0);
+#endif
+  incomplete = 0;
+  /* For each nodes, calculate epsilon closure.  */
+  for (node_idx = 0; ; ++node_idx)
+    {
+      reg_errcode_t err;
+      re_node_set eclosure_elem;
+      if (node_idx == dfa->nodes_len)
+	{
+	  if (!incomplete)
+	    break;
+	  incomplete = 0;
+	  node_idx = 0;
+	}
+
+#ifdef DEBUG
+      assert (dfa->eclosures[node_idx].nelem != -1);
+#endif
+
+      /* If we have already calculated, skip it.  */
+      if (dfa->eclosures[node_idx].nelem != 0)
+	continue;
+      /* Calculate epsilon closure of `node_idx'.  */
+      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+
+      if (dfa->eclosures[node_idx].nelem == 0)
+	{
+	  incomplete = 1;
+	  re_node_set_free (&eclosure_elem);
+	}
+    }
+  return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE.  */
+
+static reg_errcode_t
+calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
+{
+  reg_errcode_t err;
+  unsigned int constraint;
+  int i, incomplete;
+  re_node_set eclosure;
+  incomplete = 0;
+  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* This indicates that we are calculating this node now.
+     We reference this value to avoid infinite loop.  */
+  dfa->eclosures[node].nelem = -1;
+
+  constraint = ((dfa->nodes[node].type == ANCHOR)
+		? dfa->nodes[node].opr.ctx_type : 0);
+  /* If the current node has constraints, duplicate all nodes.
+     Since they must inherit the constraints.  */
+  if (constraint
+      && dfa->edests[node].nelem
+      && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+    {
+      err = duplicate_node_closure (dfa, node, node, node, constraint);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  /* Expand each epsilon destination nodes.  */
+  if (IS_EPSILON_NODE(dfa->nodes[node].type))
+    for (i = 0; i < dfa->edests[node].nelem; ++i)
+      {
+	re_node_set eclosure_elem;
+	int edest = dfa->edests[node].elems[i];
+	/* If calculating the epsilon closure of `edest' is in progress,
+	   return intermediate result.  */
+	if (dfa->eclosures[edest].nelem == -1)
+	  {
+	    incomplete = 1;
+	    continue;
+	  }
+	/* If we haven't calculated the epsilon closure of `edest' yet,
+	   calculate now. Otherwise use calculated epsilon closure.  */
+	if (dfa->eclosures[edest].nelem == 0)
+	  {
+	    err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+	    if (BE (err != REG_NOERROR, 0))
+	      return err;
+	  }
+	else
+	  eclosure_elem = dfa->eclosures[edest];
+	/* Merge the epsilon closure of `edest'.  */
+	re_node_set_merge (&eclosure, &eclosure_elem);
+	/* If the epsilon closure of `edest' is incomplete,
+	   the epsilon closure of this node is also incomplete.  */
+	if (dfa->eclosures[edest].nelem == 0)
+	  {
+	    incomplete = 1;
+	    re_node_set_free (&eclosure_elem);
+	  }
+      }
+
+  /* Epsilon closures include itself.  */
+  re_node_set_insert (&eclosure, node);
+  if (incomplete && !root)
+    dfa->eclosures[node].nelem = 0;
+  else
+    dfa->eclosures[node] = eclosure;
+  *new_set = eclosure;
+  return REG_NOERROR;
+}
+
+/* Functions for token which are used in the parser.  */
+
+/* Fetch a token from INPUT.
+   We must not use this function inside bracket expressions.  */
+
+static void
+internal_function
+fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
+{
+  re_string_skip_bytes (input, peek_token (result, input, syntax));
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function inside bracket expressions.  */
+
+static int
+internal_function
+peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+  unsigned char c;
+
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+  token->word_char = 0;
+#ifdef RE_ENABLE_I18N
+  token->mb_partial = 0;
+  if (input->mb_cur_max > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      token->mb_partial = 1;
+      return 1;
+    }
+#endif
+  if (c == '\\')
+    {
+      unsigned char c2;
+      if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+	{
+	  token->type = BACK_SLASH;
+	  return 1;
+	}
+
+      c2 = re_string_peek_byte_case (input, 1);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+      if (input->mb_cur_max > 1)
+	{
+	  wint_t wc = re_string_wchar_at (input,
+					  re_string_cur_idx (input) + 1);
+	  token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+	}
+      else
+#endif
+	token->word_char = IS_WORD_CHAR (c2) != 0;
+
+      switch (c2)
+	{
+	case '|':
+	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+	    token->type = OP_ALT;
+	  break;
+	case '1': case '2': case '3': case '4': case '5':
+	case '6': case '7': case '8': case '9':
+	  if (!(syntax & RE_NO_BK_REFS))
+	    {
+	      token->type = OP_BACK_REF;
+	      token->opr.idx = c2 - '1';
+	    }
+	  break;
+	case '<':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_FIRST;
+	    }
+	  break;
+	case '>':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_LAST;
+	    }
+	  break;
+	case 'b':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_DELIM;
+	    }
+	  break;
+	case 'B':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = NOT_WORD_DELIM;
+	    }
+	  break;
+	case 'w':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_WORD;
+	  break;
+	case 'W':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_NOTWORD;
+	  break;
+	case 's':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_SPACE;
+	  break;
+	case 'S':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_NOTSPACE;
+	  break;
+	case '`':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = BUF_FIRST;
+	    }
+	  break;
+	case '\'':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = BUF_LAST;
+	    }
+	  break;
+	case '(':
+	  if (!(syntax & RE_NO_BK_PARENS))
+	    token->type = OP_OPEN_SUBEXP;
+	  break;
+	case ')':
+	  if (!(syntax & RE_NO_BK_PARENS))
+	    token->type = OP_CLOSE_SUBEXP;
+	  break;
+	case '+':
+	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_PLUS;
+	  break;
+	case '?':
+	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_QUESTION;
+	  break;
+	case '{':
+	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+	    token->type = OP_OPEN_DUP_NUM;
+	  break;
+	case '}':
+	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+	    token->type = OP_CLOSE_DUP_NUM;
+	  break;
+	default:
+	  break;
+	}
+      return 2;
+    }
+
+  token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1)
+    {
+      wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
+      token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+    }
+  else
+#endif
+    token->word_char = IS_WORD_CHAR (token->opr.c);
+
+  switch (c)
+    {
+    case '\n':
+      if (syntax & RE_NEWLINE_ALT)
+	token->type = OP_ALT;
+      break;
+    case '|':
+      if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+	token->type = OP_ALT;
+      break;
+    case '*':
+      token->type = OP_DUP_ASTERISK;
+      break;
+    case '+':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	token->type = OP_DUP_PLUS;
+      break;
+    case '?':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	token->type = OP_DUP_QUESTION;
+      break;
+    case '{':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	token->type = OP_OPEN_DUP_NUM;
+      break;
+    case '}':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	token->type = OP_CLOSE_DUP_NUM;
+      break;
+    case '(':
+      if (syntax & RE_NO_BK_PARENS)
+	token->type = OP_OPEN_SUBEXP;
+      break;
+    case ')':
+      if (syntax & RE_NO_BK_PARENS)
+	token->type = OP_CLOSE_SUBEXP;
+      break;
+    case '[':
+      token->type = OP_OPEN_BRACKET;
+      break;
+    case '.':
+      token->type = OP_PERIOD;
+      break;
+    case '^':
+      if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
+	  re_string_cur_idx (input) != 0)
+	{
+	  char prev = re_string_peek_byte (input, -1);
+	  if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
+	    break;
+	}
+      token->type = ANCHOR;
+      token->opr.ctx_type = LINE_FIRST;
+      break;
+    case '$':
+      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+	  re_string_cur_idx (input) + 1 != re_string_length (input))
+	{
+	  re_token_t next;
+	  re_string_skip_bytes (input, 1);
+	  peek_token (&next, input, syntax);
+	  re_string_skip_bytes (input, -1);
+	  if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+	    break;
+	}
+      token->type = ANCHOR;
+      token->opr.ctx_type = LINE_LAST;
+      break;
+    default:
+      break;
+    }
+  return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function out of bracket expressions.  */
+
+static int
+internal_function
+peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+  unsigned char c;
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      return 1;
+    }
+#endif /* RE_ENABLE_I18N */
+
+  if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
+      && re_string_cur_idx (input) + 1 < re_string_length (input))
+    {
+      /* In this case, '\' escape a character.  */
+      unsigned char c2;
+      re_string_skip_bytes (input, 1);
+      c2 = re_string_peek_byte (input, 0);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+      return 1;
+    }
+  if (c == '[') /* '[' is a special char in a bracket exps.  */
+    {
+      unsigned char c2;
+      int token_len;
+      if (re_string_cur_idx (input) + 1 < re_string_length (input))
+	c2 = re_string_peek_byte (input, 1);
+      else
+	c2 = 0;
+      token->opr.c = c2;
+      token_len = 2;
+      switch (c2)
+	{
+	case '.':
+	  token->type = OP_OPEN_COLL_ELEM;
+	  break;
+	case '=':
+	  token->type = OP_OPEN_EQUIV_CLASS;
+	  break;
+	case ':':
+	  if (syntax & RE_CHAR_CLASSES)
+	    {
+	      token->type = OP_OPEN_CHAR_CLASS;
+	      break;
+	    }
+	  /* else fall through.  */
+	default:
+	  token->type = CHARACTER;
+	  token->opr.c = c;
+	  token_len = 1;
+	  break;
+	}
+      return token_len;
+    }
+  switch (c)
+    {
+    case '-':
+      token->type = OP_CHARSET_RANGE;
+      break;
+    case ']':
+      token->type = OP_CLOSE_BRACKET;
+      break;
+    case '^':
+      token->type = OP_NON_MATCH_LIST;
+      break;
+    default:
+      token->type = CHARACTER;
+    }
+  return 1;
+}
+
+/* Functions for parser.  */
+
+/* Entry point of the parser.
+   Parse the regular expression REGEXP and return the structure tree.
+   If an error is occured, ERR is set by error code, and return NULL.
+   This function build the following tree, from regular expression <reg_exp>:
+	   CAT
+	   / \
+	  /   \
+   <reg_exp>  EOR
+
+   CAT means concatenation.
+   EOR means end of regular expression.  */
+
+static bin_tree_t *
+parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
+       reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *eor, *root;
+  re_token_t current_token;
+  dfa->syntax = syntax;
+  fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+  tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+  eor = create_tree (dfa, NULL, NULL, END_OF_RE);
+  if (tree != NULL)
+    root = create_tree (dfa, tree, eor, CONCAT);
+  else
+    root = eor;
+  if (BE (eor == NULL || root == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  return root;
+}
+
+/* This function build the following tree, from regular expression
+   <branch1>|<branch2>:
+	   ALT
+	   / \
+	  /   \
+   <branch1> <branch2>
+
+   ALT means alternative, which represents the operator `|'.  */
+
+static bin_tree_t *
+parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	       reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *branch = NULL;
+  tree = parse_branch (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type == OP_ALT)
+    {
+      fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+      if (token->type != OP_ALT && token->type != END_OF_RE
+	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+	{
+	  branch = parse_branch (regexp, preg, token, syntax, nest, err);
+	  if (BE (*err != REG_NOERROR && branch == NULL, 0))
+	    return NULL;
+	}
+      else
+	branch = NULL;
+      tree = create_tree (dfa, tree, branch, OP_ALT);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   <exp1><exp2>:
+	CAT
+	/ \
+       /   \
+   <exp1> <exp2>
+
+   CAT means concatenation.  */
+
+static bin_tree_t *
+parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	      reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  bin_tree_t *tree, *exp;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  tree = parse_expression (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type != OP_ALT && token->type != END_OF_RE
+	 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+    {
+      exp = parse_expression (regexp, preg, token, syntax, nest, err);
+      if (BE (*err != REG_NOERROR && exp == NULL, 0))
+	{
+	  return NULL;
+	}
+      if (tree != NULL && exp != NULL)
+	{
+	  tree = create_tree (dfa, tree, exp, CONCAT);
+	  if (tree == NULL)
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      else if (tree == NULL)
+	tree = exp;
+      /* Otherwise exp == NULL, we don't need to create new tree.  */
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+	 *
+	 |
+	 a
+*/
+
+static bin_tree_t *
+parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
+		  reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree;
+  switch (token->type)
+    {
+    case CHARACTER:
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	{
+	  while (!re_string_eoi (regexp)
+		 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+	    {
+	      bin_tree_t *mbc_remain;
+	      fetch_token (token, regexp, syntax);
+	      mbc_remain = create_token_tree (dfa, NULL, NULL, token);
+	      tree = create_tree (dfa, tree, mbc_remain, CONCAT);
+	      if (BE (mbc_remain == NULL || tree == NULL, 0))
+		{
+		  *err = REG_ESPACE;
+		  return NULL;
+		}
+	    }
+	}
+#endif
+      break;
+    case OP_OPEN_SUBEXP:
+      tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_OPEN_BRACKET:
+      tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_BACK_REF:
+      if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
+	{
+	  *err = REG_ESUBREG;
+	  return NULL;
+	}
+      dfa->used_bkref_map |= 1 << token->opr.idx;
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      ++dfa->nbackref;
+      dfa->has_mb_node = 1;
+      break;
+    case OP_OPEN_DUP_NUM:
+      if (syntax & RE_CONTEXT_INVALID_DUP)
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+      /* FALLTHROUGH */
+    case OP_DUP_ASTERISK:
+    case OP_DUP_PLUS:
+    case OP_DUP_QUESTION:
+      if (syntax & RE_CONTEXT_INVALID_OPS)
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+      else if (syntax & RE_CONTEXT_INDEP_OPS)
+	{
+	  fetch_token (token, regexp, syntax);
+	  return parse_expression (regexp, preg, token, syntax, nest, err);
+	}
+      /* else fall through  */
+    case OP_CLOSE_SUBEXP:
+      if ((token->type == OP_CLOSE_SUBEXP) &&
+	  !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+	{
+	  *err = REG_ERPAREN;
+	  return NULL;
+	}
+      /* else fall through  */
+    case OP_CLOSE_DUP_NUM:
+      /* We treat it as a normal character.  */
+
+      /* Then we can these characters as normal characters.  */
+      token->type = CHARACTER;
+      /* mb_partial and word_char bits should be initialized already
+	 by peek_token.  */
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      break;
+    case ANCHOR:
+      if ((token->opr.ctx_type
+	   & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
+	  && dfa->word_ops_used == 0)
+	init_word_char (dfa);
+      if (token->opr.ctx_type == WORD_DELIM
+          || token->opr.ctx_type == NOT_WORD_DELIM)
+	{
+	  bin_tree_t *tree_first, *tree_last;
+	  if (token->opr.ctx_type == WORD_DELIM)
+	    {
+	      token->opr.ctx_type = WORD_FIRST;
+	      tree_first = create_token_tree (dfa, NULL, NULL, token);
+	      token->opr.ctx_type = WORD_LAST;
+            }
+          else
+            {
+	      token->opr.ctx_type = INSIDE_WORD;
+	      tree_first = create_token_tree (dfa, NULL, NULL, token);
+	      token->opr.ctx_type = INSIDE_NOTWORD;
+            }
+	  tree_last = create_token_tree (dfa, NULL, NULL, token);
+	  tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
+	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      else
+	{
+	  tree = create_token_tree (dfa, NULL, NULL, token);
+	  if (BE (tree == NULL, 0))
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      /* We must return here, since ANCHORs can't be followed
+	 by repetition operators.
+	 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+	     it must not be "<ANCHOR(^)><REPEAT(*)>".  */
+      fetch_token (token, regexp, syntax);
+      return tree;
+    case OP_PERIOD:
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      if (dfa->mb_cur_max > 1)
+	dfa->has_mb_node = 1;
+      break;
+    case OP_WORD:
+    case OP_NOTWORD:
+      tree = build_charclass_op (dfa, regexp->trans,
+				 (const unsigned char *) "alnum",
+				 (const unsigned char *) "_",
+				 token->type == OP_NOTWORD, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_SPACE:
+    case OP_NOTSPACE:
+      tree = build_charclass_op (dfa, regexp->trans,
+				 (const unsigned char *) "space",
+				 (const unsigned char *) "",
+				 token->type == OP_NOTSPACE, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_ALT:
+    case END_OF_RE:
+      return NULL;
+    case BACK_SLASH:
+      *err = REG_EESCAPE;
+      return NULL;
+    default:
+      /* Must not happen?  */
+#ifdef DEBUG
+      assert (0);
+#endif
+      return NULL;
+    }
+  fetch_token (token, regexp, syntax);
+
+  while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+	 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+    {
+      tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      /* In BRE consecutive duplications are not allowed.  */
+      if ((syntax & RE_CONTEXT_INVALID_DUP)
+	  && (token->type == OP_DUP_ASTERISK
+	      || token->type == OP_OPEN_DUP_NUM))
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+    }
+
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   (<reg_exp>):
+	 SUBEXP
+	    |
+	<reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	       reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree;
+  size_t cur_nsub;
+  cur_nsub = preg->re_nsub++;
+
+  fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+
+  /* The subexpression may be a null string.  */
+  if (token->type == OP_CLOSE_SUBEXP)
+    tree = NULL;
+  else
+    {
+      tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+      if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
+        *err = REG_EPAREN;
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+    }
+
+  if (cur_nsub <= '9' - '1')
+    dfa->completed_bkref_map |= 1 << cur_nsub;
+
+  tree = create_tree (dfa, tree, NULL, SUBEXP);
+  if (BE (tree == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  tree->token.opr.idx = cur_nsub;
+  return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc.  */
+
+static bin_tree_t *
+parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
+	      re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
+{
+  bin_tree_t *tree = NULL, *old_tree = NULL;
+  int i, start, end, start_idx = re_string_cur_idx (regexp);
+  re_token_t start_token = *token;
+
+  if (token->type == OP_OPEN_DUP_NUM)
+    {
+      end = 0;
+      start = fetch_number (regexp, token, syntax);
+      if (start == -1)
+	{
+	  if (token->type == CHARACTER && token->opr.c == ',')
+	    start = 0; /* We treat "{,m}" as "{0,m}".  */
+	  else
+	    {
+	      *err = REG_BADBR; /* <re>{} is invalid.  */
+	      return NULL;
+	    }
+	}
+      if (BE (start != -2, 1))
+	{
+	  /* We treat "{n}" as "{n,n}".  */
+	  end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+		 : ((token->type == CHARACTER && token->opr.c == ',')
+		    ? fetch_number (regexp, token, syntax) : -2));
+	}
+      if (BE (start == -2 || end == -2, 0))
+	{
+	  /* Invalid sequence.  */
+	  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+	    {
+	      if (token->type == END_OF_RE)
+		*err = REG_EBRACE;
+	      else
+		*err = REG_BADBR;
+
+	      return NULL;
+	    }
+
+	  /* If the syntax bit is set, rollback.  */
+	  re_string_set_index (regexp, start_idx);
+	  *token = start_token;
+	  token->type = CHARACTER;
+	  /* mb_partial and word_char bits should be already initialized by
+	     peek_token.  */
+	  return elem;
+	}
+
+      if (BE (end != -1 && start > end, 0))
+	{
+	  /* First number greater than second.  */
+	  *err = REG_BADBR;
+	  return NULL;
+	}
+    }
+  else
+    {
+      start = (token->type == OP_DUP_PLUS) ? 1 : 0;
+      end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
+    }
+
+  fetch_token (token, regexp, syntax);
+
+  if (BE (elem == NULL, 0))
+    return NULL;
+  if (BE (start == 0 && end == 0, 0))
+    {
+      postorder (elem, free_tree, NULL);
+      return NULL;
+    }
+
+  /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
+  if (BE (start > 0, 0))
+    {
+      tree = elem;
+      for (i = 2; i <= start; ++i)
+	{
+	  elem = duplicate_tree (elem, dfa);
+	  tree = create_tree (dfa, tree, elem, CONCAT);
+	  if (BE (elem == NULL || tree == NULL, 0))
+	    goto parse_dup_op_espace;
+	}
+
+      if (start == end)
+	return tree;
+
+      /* Duplicate ELEM before it is marked optional.  */
+      elem = duplicate_tree (elem, dfa);
+      old_tree = tree;
+    }
+  else
+    old_tree = NULL;
+
+  if (elem->token.type == SUBEXP)
+    postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
+
+  tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
+  if (BE (tree == NULL, 0))
+    goto parse_dup_op_espace;
+
+  /* This loop is actually executed only when end != -1,
+     to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?...  We have
+     already created the start+1-th copy.  */
+  for (i = start + 2; i <= end; ++i)
+    {
+      elem = duplicate_tree (elem, dfa);
+      tree = create_tree (dfa, tree, elem, CONCAT);
+      if (BE (elem == NULL || tree == NULL, 0))
+        goto parse_dup_op_espace;
+
+      tree = create_tree (dfa, tree, NULL, OP_ALT);
+      if (BE (tree == NULL, 0))
+        goto parse_dup_op_espace;
+    }
+
+  if (old_tree)
+    tree = create_tree (dfa, old_tree, tree, CONCAT);
+
+  return tree;
+
+ parse_dup_op_espace:
+  *err = REG_ESPACE;
+  return NULL;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+   I'm not sure, but maybe enough.  */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+  /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+		 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
+# else /* not RE_ENABLE_I18N */
+build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
+		 bracket_elem_t *end_elem)
+# endif /* not RE_ENABLE_I18N */
+{
+  unsigned int start_ch, end_ch;
+  /* Equivalence Classes and Character Classes can't be a range start/end.  */
+  if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+	  || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+	  0))
+    return REG_ERANGE;
+
+  /* We can handle no multi character collating elements without libc
+     support.  */
+  if (BE ((start_elem->type == COLL_SYM
+	   && strlen ((char *) start_elem->opr.name) > 1)
+	  || (end_elem->type == COLL_SYM
+	      && strlen ((char *) end_elem->opr.name) > 1), 0))
+    return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+  {
+    wchar_t wc;
+    wint_t start_wc;
+    wint_t end_wc;
+    wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+    start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+		   : 0));
+    end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+		 : 0));
+    start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+		? __btowc (start_ch) : start_elem->opr.wch);
+    end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+	      ? __btowc (end_ch) : end_elem->opr.wch);
+    if (start_wc == WEOF || end_wc == WEOF)
+      return REG_ECOLLATE;
+    cmp_buf[0] = start_wc;
+    cmp_buf[4] = end_wc;
+    if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+      return REG_ERANGE;
+
+    /* Got valid collation sequence values, add them as a new entry.
+       However, for !_LIBC we have no collation elements: if the
+       character set is single byte, the single byte character set
+       that we build below suffices.  parse_bracket_exp passes
+       no MBCSET if dfa->mb_cur_max == 1.  */
+    if (mbcset)
+      {
+        /* Check the space of the arrays.  */
+        if (BE (*range_alloc == mbcset->nranges, 0))
+          {
+	    /* There is not enough space, need realloc.  */
+	    wchar_t *new_array_start, *new_array_end;
+	    int new_nranges;
+
+	    /* +1 in case of mbcset->nranges is 0.  */
+	    new_nranges = 2 * mbcset->nranges + 1;
+	    /* Use realloc since mbcset->range_starts and mbcset->range_ends
+	       are NULL if *range_alloc == 0.  */
+	    new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+				          new_nranges);
+	    new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+				        new_nranges);
+
+	    if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+	      return REG_ESPACE;
+
+	    mbcset->range_starts = new_array_start;
+	    mbcset->range_ends = new_array_end;
+	    *range_alloc = new_nranges;
+          }
+
+        mbcset->range_starts[mbcset->nranges] = start_wc;
+        mbcset->range_ends[mbcset->nranges++] = end_wc;
+      }
+
+    /* Build the table for single byte characters.  */
+    for (wc = 0; wc < SBC_MAX; ++wc)
+      {
+	cmp_buf[2] = wc;
+	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+	    && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+	  bitset_set (sbcset, wc);
+      }
+  }
+# else /* not RE_ENABLE_I18N */
+  {
+    unsigned int ch;
+    start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+		   : 0));
+    end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+		 : 0));
+    if (start_ch > end_ch)
+      return REG_ERANGE;
+    /* Build the table for single byte characters.  */
+    for (ch = 0; ch < SBC_MAX; ++ch)
+      if (start_ch <= ch  && ch <= end_ch)
+	bitset_set (sbcset, ch);
+  }
+# endif /* not RE_ENABLE_I18N */
+  return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+   Build the collating element which is represented by NAME.
+   The result are written to MBCSET and SBCSET.
+   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+   pointer argument since we may update it.  */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+			int *coll_sym_alloc, const unsigned char *name)
+# else /* not RE_ENABLE_I18N */
+build_collating_symbol (bitset_t sbcset, const unsigned char *name)
+# endif /* not RE_ENABLE_I18N */
+{
+  size_t name_len = strlen ((const char *) name);
+  if (BE (name_len != 1, 0))
+    return REG_ECOLLATE;
+  else
+    {
+      bitset_set (sbcset, name[0]);
+      return REG_NOERROR;
+    }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+   "[[.a-a.]]" etc.  */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+		   reg_syntax_t syntax, reg_errcode_t *err)
+{
+#ifdef _LIBC
+  const unsigned char *collseqmb;
+  const char *collseqwc;
+  uint32_t nrules;
+  int32_t table_size;
+  const int32_t *symb_table;
+  const unsigned char *extra;
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Seek the collating symbol entry correspondings to NAME.
+     Return the index of the symbol in the SYMB_TABLE.  */
+
+  auto inline int32_t
+  __attribute ((always_inline))
+  seek_collating_symbol_entry (name, name_len)
+	 const unsigned char *name;
+	 size_t name_len;
+    {
+      int32_t hash = elem_hash ((const char *) name, name_len);
+      int32_t elem = hash % table_size;
+      if (symb_table[2 * elem] != 0)
+	{
+	  int32_t second = hash % (table_size - 2) + 1;
+
+	  do
+	    {
+	      /* First compare the hashing value.  */
+	      if (symb_table[2 * elem] == hash
+		  /* Compare the length of the name.  */
+		  && name_len == extra[symb_table[2 * elem + 1]]
+		  /* Compare the name.  */
+		  && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+			     name_len) == 0)
+		{
+		  /* Yep, this is the entry.  */
+		  break;
+		}
+
+	      /* Next entry.  */
+	      elem += second;
+	    }
+	  while (symb_table[2 * elem] != 0);
+	}
+      return elem;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Look up the collation sequence value of BR_ELEM.
+     Return the value if succeeded, UINT_MAX otherwise.  */
+
+  auto inline unsigned int
+  __attribute ((always_inline))
+  lookup_collation_sequence_value (br_elem)
+	 bracket_elem_t *br_elem;
+    {
+      if (br_elem->type == SB_CHAR)
+	{
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    return collseqmb[br_elem->opr.ch];
+	  else
+	    {
+	      wint_t wc = __btowc (br_elem->opr.ch);
+	      return __collseq_table_lookup (collseqwc, wc);
+	    }
+	}
+      else if (br_elem->type == MB_CHAR)
+	{
+	  return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+	}
+      else if (br_elem->type == COLL_SYM)
+	{
+	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+	  if (nrules != 0)
+	    {
+	      int32_t elem, idx;
+	      elem = seek_collating_symbol_entry (br_elem->opr.name,
+						  sym_name_len);
+	      if (symb_table[2 * elem] != 0)
+		{
+		  /* We found the entry.  */
+		  idx = symb_table[2 * elem + 1];
+		  /* Skip the name of collating element name.  */
+		  idx += 1 + extra[idx];
+		  /* Skip the byte sequence of the collating element.  */
+		  idx += 1 + extra[idx];
+		  /* Adjust for the alignment.  */
+		  idx = (idx + 3) & ~3;
+		  /* Skip the multibyte collation sequence value.  */
+		  idx += sizeof (unsigned int);
+		  /* Skip the wide char sequence of the collating element.  */
+		  idx += sizeof (unsigned int) *
+		    (1 + *(unsigned int *) (extra + idx));
+		  /* Return the collation sequence value.  */
+		  return *(unsigned int *) (extra + idx);
+		}
+	      else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+		{
+		  /* No valid character.  Match it as a single byte
+		     character.  */
+		  return collseqmb[br_elem->opr.name[0]];
+		}
+	    }
+	  else if (sym_name_len == 1)
+	    return collseqmb[br_elem->opr.name[0]];
+	}
+      return UINT_MAX;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+  auto inline reg_errcode_t
+  __attribute ((always_inline))
+  build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+	 re_charset_t *mbcset;
+	 int *range_alloc;
+	 bitset_t sbcset;
+	 bracket_elem_t *start_elem, *end_elem;
+    {
+      unsigned int ch;
+      uint32_t start_collseq;
+      uint32_t end_collseq;
+
+      /* Equivalence Classes and Character Classes can't be a range
+	 start/end.  */
+      if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+	      || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+	      0))
+	return REG_ERANGE;
+
+      start_collseq = lookup_collation_sequence_value (start_elem);
+      end_collseq = lookup_collation_sequence_value (end_elem);
+      /* Check start/end collation sequence values.  */
+      if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+	return REG_ECOLLATE;
+      if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+	return REG_ERANGE;
+
+      /* Got valid collation sequence values, add them as a new entry.
+	 However, if we have no collation elements, and the character set
+	 is single byte, the single byte character set that we
+	 build below suffices. */
+      if (nrules > 0 || dfa->mb_cur_max > 1)
+	{
+          /* Check the space of the arrays.  */
+          if (BE (*range_alloc == mbcset->nranges, 0))
+	    {
+	      /* There is not enough space, need realloc.  */
+	      uint32_t *new_array_start;
+	      uint32_t *new_array_end;
+	      int new_nranges;
+
+	      /* +1 in case of mbcset->nranges is 0.  */
+	      new_nranges = 2 * mbcset->nranges + 1;
+	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+					    new_nranges);
+	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+				          new_nranges);
+
+	      if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+	        return REG_ESPACE;
+
+	      mbcset->range_starts = new_array_start;
+	      mbcset->range_ends = new_array_end;
+	      *range_alloc = new_nranges;
+	    }
+
+          mbcset->range_starts[mbcset->nranges] = start_collseq;
+          mbcset->range_ends[mbcset->nranges++] = end_collseq;
+	}
+
+      /* Build the table for single byte characters.  */
+      for (ch = 0; ch < SBC_MAX; ch++)
+	{
+	  uint32_t ch_collseq;
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    ch_collseq = collseqmb[ch];
+	  else
+	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+	    bitset_set (sbcset, ch);
+	}
+      return REG_NOERROR;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the collating element which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+     pointer argument sinse we may update it.  */
+
+  auto inline reg_errcode_t
+  __attribute ((always_inline))
+  build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+	 re_charset_t *mbcset;
+	 int *coll_sym_alloc;
+	 bitset_t sbcset;
+	 const unsigned char *name;
+    {
+      int32_t elem, idx;
+      size_t name_len = strlen ((const char *) name);
+      if (nrules != 0)
+	{
+	  elem = seek_collating_symbol_entry (name, name_len);
+	  if (symb_table[2 * elem] != 0)
+	    {
+	      /* We found the entry.  */
+	      idx = symb_table[2 * elem + 1];
+	      /* Skip the name of collating element name.  */
+	      idx += 1 + extra[idx];
+	    }
+	  else if (symb_table[2 * elem] == 0 && name_len == 1)
+	    {
+	      /* No valid character, treat it as a normal
+		 character.  */
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
+	  else
+	    return REG_ECOLLATE;
+
+	  /* Got valid collation sequence, add it as a new entry.  */
+	  /* Check the space of the arrays.  */
+	  if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
+	    {
+	      /* Not enough, realloc it.  */
+	      /* +1 in case of mbcset->ncoll_syms is 0.  */
+	      int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+	      /* Use realloc since mbcset->coll_syms is NULL
+		 if *alloc == 0.  */
+	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+						   new_coll_sym_alloc);
+	      if (BE (new_coll_syms == NULL, 0))
+		return REG_ESPACE;
+	      mbcset->coll_syms = new_coll_syms;
+	      *coll_sym_alloc = new_coll_sym_alloc;
+	    }
+	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+	  return REG_NOERROR;
+	}
+      else
+	{
+	  if (BE (name_len != 1, 0))
+	    return REG_ECOLLATE;
+	  else
+	    {
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
+	}
+    }
+#endif
+
+  re_token_t br_token;
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+  int equiv_class_alloc = 0, char_class_alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+  int non_match = 0;
+  bin_tree_t *work_tree;
+  int token_len;
+  int first_round = 1;
+#ifdef _LIBC
+  collseqmb = (const unsigned char *)
+    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules)
+    {
+      /*
+      if (MB_CUR_MAX > 1)
+      */
+      collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+      table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+      symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						  _NL_COLLATE_SYMB_TABLEMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+						   _NL_COLLATE_SYMB_EXTRAMB);
+    }
+#endif
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+  if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  token_len = peek_token_bracket (token, regexp, syntax);
+  if (BE (token->type == END_OF_RE, 0))
+    {
+      *err = REG_BADPAT;
+      goto parse_bracket_exp_free_return;
+    }
+  if (token->type == OP_NON_MATCH_LIST)
+    {
+#ifdef RE_ENABLE_I18N
+      mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+      non_match = 1;
+      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+	bitset_set (sbcset, '\0');
+      re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+      token_len = peek_token_bracket (token, regexp, syntax);
+      if (BE (token->type == END_OF_RE, 0))
+	{
+	  *err = REG_BADPAT;
+	  goto parse_bracket_exp_free_return;
+	}
+    }
+
+  /* We treat the first ']' as a normal character.  */
+  if (token->type == OP_CLOSE_BRACKET)
+    token->type = CHARACTER;
+
+  while (1)
+    {
+      bracket_elem_t start_elem, end_elem;
+      unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+      unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+      reg_errcode_t ret;
+      int token_len2 = 0, is_range_exp = 0;
+      re_token_t token2;
+
+      start_elem.opr.name = start_name_buf;
+      ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+				   syntax, first_round);
+      if (BE (ret != REG_NOERROR, 0))
+	{
+	  *err = ret;
+	  goto parse_bracket_exp_free_return;
+	}
+      first_round = 0;
+
+      /* Get information about the next token.  We need it in any case.  */
+      token_len = peek_token_bracket (token, regexp, syntax);
+
+      /* Do not check for ranges if we know they are not allowed.  */
+      if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
+	{
+	  if (BE (token->type == END_OF_RE, 0))
+	    {
+	      *err = REG_EBRACK;
+	      goto parse_bracket_exp_free_return;
+	    }
+	  if (token->type == OP_CHARSET_RANGE)
+	    {
+	      re_string_skip_bytes (regexp, token_len); /* Skip '-'.  */
+	      token_len2 = peek_token_bracket (&token2, regexp, syntax);
+	      if (BE (token2.type == END_OF_RE, 0))
+		{
+		  *err = REG_EBRACK;
+		  goto parse_bracket_exp_free_return;
+		}
+	      if (token2.type == OP_CLOSE_BRACKET)
+		{
+		  /* We treat the last '-' as a normal character.  */
+		  re_string_skip_bytes (regexp, -token_len);
+		  token->type = CHARACTER;
+		}
+	      else
+		is_range_exp = 1;
+	    }
+	}
+
+      if (is_range_exp == 1)
+	{
+	  end_elem.opr.name = end_name_buf;
+	  ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+				       dfa, syntax, 1);
+	  if (BE (ret != REG_NOERROR, 0))
+	    {
+	      *err = ret;
+	      goto parse_bracket_exp_free_return;
+	    }
+
+	  token_len = peek_token_bracket (token, regexp, syntax);
+
+#ifdef _LIBC
+	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
+				  &start_elem, &end_elem);
+#else
+# ifdef RE_ENABLE_I18N
+	  *err = build_range_exp (sbcset,
+				  dfa->mb_cur_max > 1 ? mbcset : NULL,
+				  &range_alloc, &start_elem, &end_elem);
+# else
+	  *err = build_range_exp (sbcset, &start_elem, &end_elem);
+# endif
+#endif /* RE_ENABLE_I18N */
+	  if (BE (*err != REG_NOERROR, 0))
+	    goto parse_bracket_exp_free_return;
+	}
+      else
+	{
+	  switch (start_elem.type)
+	    {
+	    case SB_CHAR:
+	      bitset_set (sbcset, start_elem.opr.ch);
+	      break;
+#ifdef RE_ENABLE_I18N
+	    case MB_CHAR:
+	      /* Check whether the array has enough space.  */
+	      if (BE (mbchar_alloc == mbcset->nmbchars, 0))
+		{
+		  wchar_t *new_mbchars;
+		  /* Not enough, realloc it.  */
+		  /* +1 in case of mbcset->nmbchars is 0.  */
+		  mbchar_alloc = 2 * mbcset->nmbchars + 1;
+		  /* Use realloc since array is NULL if *alloc == 0.  */
+		  new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
+					    mbchar_alloc);
+		  if (BE (new_mbchars == NULL, 0))
+		    goto parse_bracket_exp_espace;
+		  mbcset->mbchars = new_mbchars;
+		}
+	      mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+	      break;
+#endif /* RE_ENABLE_I18N */
+	    case EQUIV_CLASS:
+	      *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+					mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+					start_elem.opr.name);
+	      if (BE (*err != REG_NOERROR, 0))
+		goto parse_bracket_exp_free_return;
+	      break;
+	    case COLL_SYM:
+	      *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+					     mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+					     start_elem.opr.name);
+	      if (BE (*err != REG_NOERROR, 0))
+		goto parse_bracket_exp_free_return;
+	      break;
+	    case CHAR_CLASS:
+	      *err = build_charclass (regexp->trans, sbcset,
+#ifdef RE_ENABLE_I18N
+				      mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+				      start_elem.opr.name, syntax);
+	      if (BE (*err != REG_NOERROR, 0))
+	       goto parse_bracket_exp_free_return;
+	      break;
+	    default:
+	      assert (0);
+	      break;
+	    }
+	}
+      if (BE (token->type == END_OF_RE, 0))
+	{
+	  *err = REG_EBRACK;
+	  goto parse_bracket_exp_free_return;
+	}
+      if (token->type == OP_CLOSE_BRACKET)
+	break;
+    }
+
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+
+  /* If it is non-matching list.  */
+  if (non_match)
+    bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure only single byte characters are set.  */
+  if (dfa->mb_cur_max > 1)
+    bitset_mask (sbcset, dfa->sb_char);
+
+  if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+      || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
+						     || mbcset->non_match)))
+    {
+      bin_tree_t *mbc_tree;
+      int sbc_idx;
+      /* Build a tree for complex bracket.  */
+      dfa->has_mb_node = 1;
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (mbc_tree == NULL, 0))
+	goto parse_bracket_exp_espace;
+      for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
+	if (sbcset[sbc_idx])
+	  break;
+      /* If there are no bits set in sbcset, there is no point
+	 of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */
+      if (sbc_idx < BITSET_WORDS)
+	{
+          /* Build a tree for simple bracket.  */
+          br_token.type = SIMPLE_BRACKET;
+          br_token.opr.sbcset = sbcset;
+          work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+          if (BE (work_tree == NULL, 0))
+            goto parse_bracket_exp_espace;
+
+          /* Then join them by ALT node.  */
+          work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
+          if (BE (work_tree == NULL, 0))
+            goto parse_bracket_exp_espace;
+	}
+      else
+	{
+	  re_free (sbcset);
+	  work_tree = mbc_tree;
+	}
+    }
+  else
+#endif /* not RE_ENABLE_I18N */
+    {
+#ifdef RE_ENABLE_I18N
+      free_charset (mbcset);
+#endif
+      /* Build a tree for simple bracket.  */
+      br_token.type = SIMPLE_BRACKET;
+      br_token.opr.sbcset = sbcset;
+      work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (work_tree == NULL, 0))
+        goto parse_bracket_exp_espace;
+    }
+  return work_tree;
+
+ parse_bracket_exp_espace:
+  *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  return NULL;
+}
+
+/* Parse an element in the bracket expression.  */
+
+static reg_errcode_t
+parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
+		       re_token_t *token, int token_len, re_dfa_t *dfa,
+		       reg_syntax_t syntax, int accept_hyphen)
+{
+#ifdef RE_ENABLE_I18N
+  int cur_char_size;
+  cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+  if (cur_char_size > 1)
+    {
+      elem->type = MB_CHAR;
+      elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+      re_string_skip_bytes (regexp, cur_char_size);
+      return REG_NOERROR;
+    }
+#endif /* RE_ENABLE_I18N */
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+  if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+      || token->type == OP_OPEN_EQUIV_CLASS)
+    return parse_bracket_symbol (elem, regexp, token);
+  if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
+    {
+      /* A '-' must only appear as anything but a range indicator before
+	 the closing bracket.  Everything else is an error.  */
+      re_token_t token2;
+      (void) peek_token_bracket (&token2, regexp, syntax);
+      if (token2.type != OP_CLOSE_BRACKET)
+	/* The actual error value is not standardized since this whole
+	   case is undefined.  But ERANGE makes good sense.  */
+	return REG_ERANGE;
+    }
+  elem->type = SB_CHAR;
+  elem->opr.ch = token->opr.c;
+  return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression.  Bracket symbols are
+   such as [:<character_class>:], [.<collating_element>.], and
+   [=<equivalent_class>=].  */
+
+static reg_errcode_t
+parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
+		      re_token_t *token)
+{
+  unsigned char ch, delim = token->opr.c;
+  int i = 0;
+  if (re_string_eoi(regexp))
+    return REG_EBRACK;
+  for (;; ++i)
+    {
+      if (i >= BRACKET_NAME_BUF_SIZE)
+	return REG_EBRACK;
+      if (token->type == OP_OPEN_CHAR_CLASS)
+	ch = re_string_fetch_byte_case (regexp);
+      else
+	ch = re_string_fetch_byte (regexp);
+      if (re_string_eoi(regexp))
+	return REG_EBRACK;
+      if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+	break;
+      elem->opr.name[i] = ch;
+    }
+  re_string_skip_bytes (regexp, 1);
+  elem->opr.name[i] = '\0';
+  switch (token->type)
+    {
+    case OP_OPEN_COLL_ELEM:
+      elem->type = COLL_SYM;
+      break;
+    case OP_OPEN_EQUIV_CLASS:
+      elem->type = EQUIV_CLASS;
+      break;
+    case OP_OPEN_CHAR_CLASS:
+      elem->type = CHAR_CLASS;
+      break;
+    default:
+      break;
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the equivalence class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
+		   int *equiv_class_alloc, const unsigned char *name)
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (bitset_t sbcset, const unsigned char *name)
+#endif /* not RE_ENABLE_I18N */
+{
+#ifdef _LIBC
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules != 0)
+    {
+      const int32_t *table, *indirect;
+      const unsigned char *weights, *extra, *cp;
+      unsigned char char_buf[2];
+      int32_t idx1, idx2;
+      unsigned int ch;
+      size_t len;
+      /* This #include defines a local function!  */
+# include <locale/weight.h>
+      /* Calculate the index for equivalence class.  */
+      cp = name;
+      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+      weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+					       _NL_COLLATE_WEIGHTMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+						   _NL_COLLATE_EXTRAMB);
+      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						_NL_COLLATE_INDIRECTMB);
+      idx1 = findidx (&cp);
+      if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+	/* This isn't a valid character.  */
+	return REG_ECOLLATE;
+
+      /* Build single byte matcing table for this equivalence class.  */
+      char_buf[1] = (unsigned char) '\0';
+      len = weights[idx1];
+      for (ch = 0; ch < SBC_MAX; ++ch)
+	{
+	  char_buf[0] = ch;
+	  cp = char_buf;
+	  idx2 = findidx (&cp);
+/*
+	  idx2 = table[ch];
+*/
+	  if (idx2 == 0)
+	    /* This isn't a valid character.  */
+	    continue;
+	  if (len == weights[idx2])
+	    {
+	      int cnt = 0;
+	      while (cnt <= len &&
+		     weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
+		++cnt;
+
+	      if (cnt > len)
+		bitset_set (sbcset, ch);
+	    }
+	}
+      /* Check whether the array has enough space.  */
+      if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
+	{
+	  /* Not enough, realloc it.  */
+	  /* +1 in case of mbcset->nequiv_classes is 0.  */
+	  int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+	  /* Use realloc since the array is NULL if *alloc == 0.  */
+	  int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
+						   int32_t,
+						   new_equiv_class_alloc);
+	  if (BE (new_equiv_classes == NULL, 0))
+	    return REG_ESPACE;
+	  mbcset->equiv_classes = new_equiv_classes;
+	  *equiv_class_alloc = new_equiv_class_alloc;
+	}
+      mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+    }
+  else
+#endif /* _LIBC */
+    {
+      if (BE (strlen ((const char *) name) != 1, 0))
+	return REG_ECOLLATE;
+      bitset_set (sbcset, *name);
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the character class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+		 re_charset_t *mbcset, int *char_class_alloc,
+		 const unsigned char *class_name, reg_syntax_t syntax)
+#else /* not RE_ENABLE_I18N */
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+		 const unsigned char *class_name, reg_syntax_t syntax)
+#endif /* not RE_ENABLE_I18N */
+{
+  int i;
+  const char *name = (const char *) class_name;
+
+  /* In case of REG_ICASE "upper" and "lower" match the both of
+     upper and lower cases.  */
+  if ((syntax & RE_ICASE)
+      && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
+    name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+  /* Check the space of the arrays.  */
+  if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
+    {
+      /* Not enough, realloc it.  */
+      /* +1 in case of mbcset->nchar_classes is 0.  */
+      int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
+      /* Use realloc since array is NULL if *alloc == 0.  */
+      wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
+					       new_char_class_alloc);
+      if (BE (new_char_classes == NULL, 0))
+	return REG_ESPACE;
+      mbcset->char_classes = new_char_classes;
+      *char_class_alloc = new_char_class_alloc;
+    }
+  mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func)	\
+  do {						\
+    if (BE (trans != NULL, 0))			\
+      {						\
+	for (i = 0; i < SBC_MAX; ++i)		\
+  	  if (ctype_func (i))			\
+	    bitset_set (sbcset, trans[i]);	\
+      }						\
+    else					\
+      {						\
+	for (i = 0; i < SBC_MAX; ++i)		\
+  	  if (ctype_func (i))			\
+	    bitset_set (sbcset, i);		\
+      }						\
+  } while (0)
+
+  if (strcmp (name, "alnum") == 0)
+    BUILD_CHARCLASS_LOOP (isalnum);
+  else if (strcmp (name, "cntrl") == 0)
+    BUILD_CHARCLASS_LOOP (iscntrl);
+  else if (strcmp (name, "lower") == 0)
+    BUILD_CHARCLASS_LOOP (islower);
+  else if (strcmp (name, "space") == 0)
+    BUILD_CHARCLASS_LOOP (isspace);
+  else if (strcmp (name, "alpha") == 0)
+    BUILD_CHARCLASS_LOOP (isalpha);
+  else if (strcmp (name, "digit") == 0)
+    BUILD_CHARCLASS_LOOP (isdigit);
+  else if (strcmp (name, "print") == 0)
+    BUILD_CHARCLASS_LOOP (isprint);
+  else if (strcmp (name, "upper") == 0)
+    BUILD_CHARCLASS_LOOP (isupper);
+  else if (strcmp (name, "blank") == 0)
+    BUILD_CHARCLASS_LOOP (isblank);
+  else if (strcmp (name, "graph") == 0)
+    BUILD_CHARCLASS_LOOP (isgraph);
+  else if (strcmp (name, "punct") == 0)
+    BUILD_CHARCLASS_LOOP (ispunct);
+  else if (strcmp (name, "xdigit") == 0)
+    BUILD_CHARCLASS_LOOP (isxdigit);
+  else
+    return REG_ECTYPE;
+
+  return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
+		    const unsigned char *class_name,
+		    const unsigned char *extra, int non_match,
+		    reg_errcode_t *err)
+{
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+  reg_errcode_t ret;
+  re_token_t br_token;
+  bin_tree_t *tree;
+
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+  if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  if (non_match)
+    {
+#ifdef RE_ENABLE_I18N
+      /*
+      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+	bitset_set(cset->sbcset, '\0');
+      */
+      mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+    }
+
+  /* We don't care the syntax in this case.  */
+  ret = build_charclass (trans, sbcset,
+#ifdef RE_ENABLE_I18N
+			 mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+			 class_name, 0);
+
+  if (BE (ret != REG_NOERROR, 0))
+    {
+      re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+      free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+      *err = ret;
+      return NULL;
+    }
+  /* \w match '_' also.  */
+  for (; *extra; extra++)
+    bitset_set (sbcset, *extra);
+
+  /* If it is non-matching list.  */
+  if (non_match)
+    bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure only single byte characters are set.  */
+  if (dfa->mb_cur_max > 1)
+    bitset_mask (sbcset, dfa->sb_char);
+#endif
+
+  /* Build a tree for simple bracket.  */
+  br_token.type = SIMPLE_BRACKET;
+  br_token.opr.sbcset = sbcset;
+  tree = create_token_tree (dfa, NULL, NULL, &br_token);
+  if (BE (tree == NULL, 0))
+    goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+  if (dfa->mb_cur_max > 1)
+    {
+      bin_tree_t *mbc_tree;
+      /* Build a tree for complex bracket.  */
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      dfa->has_mb_node = 1;
+      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (mbc_tree == NULL, 0))
+	goto build_word_op_espace;
+      /* Then join them by ALT node.  */
+      tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
+      if (BE (mbc_tree != NULL, 1))
+	return tree;
+    }
+  else
+    {
+      free_charset (mbcset);
+      return tree;
+    }
+#else /* not RE_ENABLE_I18N */
+  return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  *err = REG_ESPACE;
+  return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+   Fetch a number from `input', and return the number.
+   Return -1, if the number field is empty like "{,1}".
+   Return -2, If an error is occured.  */
+
+static int
+fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
+{
+  int num = -1;
+  unsigned char c;
+  while (1)
+    {
+      fetch_token (token, input, syntax);
+      c = token->opr.c;
+      if (BE (token->type == END_OF_RE, 0))
+	return -2;
+      if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+	break;
+      num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+	     ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
+      num = (num > RE_DUP_MAX) ? -2 : num;
+    }
+  return num;
+}
+
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+  re_free (cset->mbchars);
+# ifdef _LIBC
+  re_free (cset->coll_syms);
+  re_free (cset->equiv_classes);
+  re_free (cset->range_starts);
+  re_free (cset->range_ends);
+# endif
+  re_free (cset->char_classes);
+  re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Functions for binary tree operation.  */
+
+/* Create a tree node.  */
+
+static bin_tree_t *
+create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+	     re_token_type_t type)
+{
+  re_token_t t;
+  t.type = type;
+  return create_token_tree (dfa, left, right, &t);
+}
+
+static bin_tree_t *
+create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+		   const re_token_t *token)
+{
+  bin_tree_t *tree;
+  if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
+    {
+      bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
+
+      if (storage == NULL)
+	return NULL;
+      storage->next = dfa->str_tree_storage;
+      dfa->str_tree_storage = storage;
+      dfa->str_tree_storage_idx = 0;
+    }
+  tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
+
+  tree->parent = NULL;
+  tree->left = left;
+  tree->right = right;
+  tree->token = *token;
+  tree->token.duplicated = 0;
+  tree->token.opt_subexp = 0;
+  tree->first = NULL;
+  tree->next = NULL;
+  tree->node_idx = -1;
+
+  if (left != NULL)
+    left->parent = tree;
+  if (right != NULL)
+    right->parent = tree;
+  return tree;
+}
+
+/* Mark the tree SRC as an optional subexpression.
+   To be called from preorder or postorder.  */
+
+static reg_errcode_t
+mark_opt_subexp (void *extra, bin_tree_t *node)
+{
+  int idx = (int) (long) extra;
+  if (node->token.type == SUBEXP && node->token.opr.idx == idx)
+    node->token.opt_subexp = 1;
+
+  return REG_NOERROR;
+}
+
+/* Free the allocated memory inside NODE. */
+
+static void
+free_token (re_token_t *node)
+{
+#ifdef RE_ENABLE_I18N
+  if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+    free_charset (node->opr.mbcset);
+  else
+#endif /* RE_ENABLE_I18N */
+    if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+      re_free (node->opr.sbcset);
+}
+
+/* Worker function for tree walking.  Free the allocated memory inside NODE
+   and its children. */
+
+static reg_errcode_t
+free_tree (void *extra, bin_tree_t *node)
+{
+  free_token (&node->token);
+  return REG_NOERROR;
+}
+
+
+/* Duplicate the node SRC, and return new node.  This is a preorder
+   visit similar to the one implemented by the generic visitor, but
+   we need more infrastructure to maintain two parallel trees --- so,
+   it's easier to duplicate.  */
+
+static bin_tree_t *
+duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
+{
+  const bin_tree_t *node;
+  bin_tree_t *dup_root;
+  bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
+
+  for (node = root; ; )
+    {
+      /* Create a new tree and link it back to the current parent.  */
+      *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
+      if (*p_new == NULL)
+	return NULL;
+      (*p_new)->parent = dup_node;
+      (*p_new)->token.duplicated = 1;
+      dup_node = *p_new;
+
+      /* Go to the left node, or up and to the right.  */
+      if (node->left)
+	{
+	  node = node->left;
+	  p_new = &dup_node->left;
+	}
+      else
+	{
+	  const bin_tree_t *prev = NULL;
+	  while (node->right == prev || node->right == NULL)
+	    {
+	      prev = node;
+	      node = node->parent;
+	      dup_node = dup_node->parent;
+	      if (!node)
+	        return dup_root;
+	    }
+	  node = node->right;
+	  p_new = &dup_node->right;
+	}
+    }
+}
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regexec.c" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+				     int n) internal_function;
+static void match_ctx_clean (re_match_context_t *mctx) internal_function;
+static void match_ctx_free (re_match_context_t *cache) internal_function;
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+					  int str_idx, int from, int to)
+     internal_function;
+static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+     internal_function;
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
+					   int str_idx) internal_function;
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+						   int node, int str_idx)
+     internal_function;
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+			   re_dfastate_t **limited_sts, int last_node,
+			   int last_str_idx)
+     internal_function;
+static reg_errcode_t re_search_internal (const regex_t *preg,
+					 const char *string, int length,
+					 int start, int range, int stop,
+					 size_t nmatch, regmatch_t pmatch[],
+					 int eflags) internal_function;
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+			     const char *string1, int length1,
+			     const char *string2, int length2,
+			     int start, int range, struct re_registers *regs,
+			     int stop, int ret_len) internal_function;
+static int re_search_stub (struct re_pattern_buffer *bufp,
+			   const char *string, int length, int start,
+			   int range, int stop, struct re_registers *regs,
+			   int ret_len) internal_function;
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+			      int nregs, int regs_allocated) internal_function;
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
+     internal_function;
+static int check_matching (re_match_context_t *mctx, int fl_longest_match,
+			   int *p_match_first) internal_function;
+static int check_halt_state_context (const re_match_context_t *mctx,
+				     const re_dfastate_t *state, int idx)
+     internal_function;
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+			 regmatch_t *prev_idx_match, int cur_node,
+			 int cur_idx, int nmatch) internal_function;
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+				      int str_idx, int dest_node, int nregs,
+				      regmatch_t *regs,
+				      re_node_set *eps_via_nodes)
+     internal_function;
+static reg_errcode_t set_regs (const regex_t *preg,
+			       const re_match_context_t *mctx,
+			       size_t nmatch, regmatch_t *pmatch,
+			       int fl_backtrack) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
+     internal_function;
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const re_match_context_t *mctx,
+				re_sift_context_t *sctx,
+				int node_idx, int str_idx, int max_str_idx)
+     internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
+					   re_sift_context_t *sctx)
+     internal_function;
+static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
+					  re_sift_context_t *sctx, int str_idx,
+					  re_node_set *cur_dest)
+     internal_function;
+static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
+					      re_sift_context_t *sctx,
+					      int str_idx,
+					      re_node_set *dest_nodes)
+     internal_function;
+static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
+					    re_node_set *dest_nodes,
+					    const re_node_set *candidates)
+     internal_function;
+static int check_dst_limits (const re_match_context_t *mctx,
+			     re_node_set *limits,
+			     int dst_node, int dst_idx, int src_node,
+			     int src_idx) internal_function;
+static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
+					int boundaries, int subexp_idx,
+					int from_node, int bkref_idx)
+     internal_function;
+static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
+				      int limit, int subexp_idx,
+				      int node, int str_idx,
+				      int bkref_idx) internal_function;
+static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
+					  re_node_set *dest_nodes,
+					  const re_node_set *candidates,
+					  re_node_set *limits,
+					  struct re_backref_cache_entry *bkref_ents,
+					  int str_idx) internal_function;
+static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
+					re_sift_context_t *sctx,
+					int str_idx, const re_node_set *candidates)
+     internal_function;
+static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
+					re_dfastate_t **dst,
+					re_dfastate_t **src, int num)
+     internal_function;
+static re_dfastate_t *find_recover_state (reg_errcode_t *err,
+					 re_match_context_t *mctx) internal_function;
+static re_dfastate_t *transit_state (reg_errcode_t *err,
+				     re_match_context_t *mctx,
+				     re_dfastate_t *state) internal_function;
+static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
+					    re_match_context_t *mctx,
+					    re_dfastate_t *next_state)
+     internal_function;
+static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
+						re_node_set *cur_nodes,
+						int str_idx) internal_function;
+#if 0
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
+					re_match_context_t *mctx,
+					re_dfastate_t *pstate)
+     internal_function;
+#endif
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
+				       re_dfastate_t *pstate)
+     internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
+					  const re_node_set *nodes)
+     internal_function;
+static reg_errcode_t get_subexp (re_match_context_t *mctx,
+				 int bkref_node, int bkref_str_idx)
+     internal_function;
+static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
+				     const re_sub_match_top_t *sub_top,
+				     re_sub_match_last_t *sub_last,
+				     int bkref_node, int bkref_str)
+     internal_function;
+static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+			     int subexp_idx, int type) internal_function;
+static reg_errcode_t check_arrival (re_match_context_t *mctx,
+				    state_array_t *path, int top_node,
+				    int top_str, int last_node, int last_str,
+				    int type) internal_function;
+static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
+						   int str_idx,
+						   re_node_set *cur_nodes,
+						   re_node_set *next_nodes)
+     internal_function;
+static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
+					       re_node_set *cur_nodes,
+					       int ex_subexp, int type)
+     internal_function;
+static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
+						   re_node_set *dst_nodes,
+						   int target, int ex_subexp,
+						   int type) internal_function;
+static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
+					 re_node_set *cur_nodes, int cur_str,
+					 int subexp_num, int type)
+     internal_function;
+static int build_trtable (const re_dfa_t *dfa,
+			  re_dfastate_t *state) internal_function;
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+				    const re_string_t *input, int idx)
+     internal_function;
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+						   size_t name_len)
+     internal_function;
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
+				       const re_dfastate_t *state,
+				       re_node_set *states_node,
+				       bitset_t *states_ch) internal_function;
+static int check_node_accept (const re_match_context_t *mctx,
+			      const re_token_t *node, int idx)
+     internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+     internal_function;
+
+/* Entry point for POSIX code.  */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+   string STRING.
+
+   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
+   least NMATCH elements, and we set them to the offsets of the
+   corresponding matched substrings.
+
+   EFLAGS specifies `execution flags' which affect matching: if
+   REG_NOTBOL is set, then ^ does not match at the beginning of the
+   string; if REG_NOTEOL is set, then $ does not match at the end.
+
+   We return 0 if we find a match and REG_NOMATCH if not.  */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+    const regex_t *__restrict preg;
+    const char *__restrict string;
+    size_t nmatch;
+    regmatch_t pmatch[];
+    int eflags;
+{
+  reg_errcode_t err;
+  int start, length;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+
+  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
+    return REG_BADPAT;
+
+  if (eflags & REG_STARTEND)
+    {
+      start = pmatch[0].rm_so;
+      length = pmatch[0].rm_eo;
+    }
+  else
+    {
+      start = 0;
+      length = strlen (string);
+    }
+
+  __libc_lock_lock (dfa->lock);
+  if (preg->no_sub)
+    err = re_search_internal (preg, string, length, start, length - start,
+			      length, 0, NULL, eflags);
+  else
+    err = re_search_internal (preg, string, length, start, length - start,
+			      length, nmatch, pmatch, eflags);
+  __libc_lock_unlock (dfa->lock);
+  return err != REG_NOERROR;
+}
+
+#ifdef _LIBC
+# include <shlib-compat.h>
+versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
+
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+__typeof__ (__regexec) __compat_regexec;
+
+int
+attribute_compat_text_section
+__compat_regexec (const regex_t *__restrict preg,
+		  const char *__restrict string, size_t nmatch,
+		  regmatch_t pmatch[], int eflags)
+{
+  return regexec (preg, string, nmatch, pmatch,
+		  eflags & (REG_NOTBOL | REG_NOTEOL));
+}
+compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
+# endif
+#endif
+
+/* Entry points for GNU code.  */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+   The former two functions operate on STRING with length LENGTH,
+   while the later two operate on concatenation of STRING1 and STRING2
+   with lengths LENGTH1 and LENGTH2, respectively.
+
+   re_match() matches the compiled pattern in BUFP against the string,
+   starting at index START.
+
+   re_search() first tries matching at index START, then it tries to match
+   starting from index START + 1, and so on.  The last start position tried
+   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
+   way as re_match().)
+
+   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+   the first STOP characters of the concatenation of the strings should be
+   concerned.
+
+   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
+   computed relative to the concatenation, not relative to the individual
+   strings.)
+
+   On success, re_match* functions return the length of the match, re_search*
+   return the position of the start of the match.  Return value -1 means no
+   match was found and -2 indicates an internal error.  */
+
+int
+re_match (bufp, string, length, start, regs)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start;
+    struct re_registers *regs;
+{
+  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match, re_match)
+#endif
+
+int
+re_search (bufp, string, length, start, range, regs)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range;
+    struct re_registers *regs;
+{
+  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+int
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, stop;
+    struct re_registers *regs;
+{
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+			   start, 0, regs, stop, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+int
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop;
+    struct re_registers *regs;
+{
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+			   start, range, regs, stop, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+		  stop, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop, ret_len;
+    struct re_registers *regs;
+{
+  const char *str;
+  int rval;
+  int len = length1 + length2;
+  int free_str = 0;
+
+  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+    return -2;
+
+  /* Concatenate the strings.  */
+  if (length2 > 0)
+    if (length1 > 0)
+      {
+	char *s = re_malloc (char, len);
+
+	if (BE (s == NULL, 0))
+	  return -2;
+#ifdef _LIBC
+	memcpy (__mempcpy (s, string1, length1), string2, length2);
+#else
+	memcpy (s, string1, length1);
+	memcpy (s + length1, string2, length2);
+#endif
+	str = s;
+	free_str = 1;
+      }
+    else
+      str = string2;
+  else
+    str = string1;
+
+  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+			 ret_len);
+  if (free_str)
+    re_free ((char *) str);
+  return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+   Additional parameters:
+   If RET_LEN is nonzero the length of the match is returned (re_match style);
+   otherwise the position of the match is returned.  */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range, stop, ret_len;
+    struct re_registers *regs;
+{
+  reg_errcode_t result;
+  regmatch_t *pmatch;
+  int nregs, rval;
+  int eflags = 0;
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+
+  /* Check for out-of-range.  */
+  if (BE (start < 0 || start > length, 0))
+    return -1;
+  if (BE (start + range > length, 0))
+    range = length - start;
+  else if (BE (start + range < 0, 0))
+    range = -start;
+
+  __libc_lock_lock (dfa->lock);
+
+  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+  /* Compile fastmap if we haven't yet.  */
+  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+    re_compile_fastmap (bufp);
+
+  if (BE (bufp->no_sub, 0))
+    regs = NULL;
+
+  /* We need at least 1 register.  */
+  if (regs == NULL)
+    nregs = 1;
+  else if (BE (bufp->regs_allocated == REGS_FIXED &&
+	       regs->num_regs < bufp->re_nsub + 1, 0))
+    {
+      nregs = regs->num_regs;
+      if (BE (nregs < 1, 0))
+	{
+	  /* Nothing can be copied to regs.  */
+	  regs = NULL;
+	  nregs = 1;
+	}
+    }
+  else
+    nregs = bufp->re_nsub + 1;
+  pmatch = re_malloc (regmatch_t, nregs);
+  if (BE (pmatch == NULL, 0))
+    {
+      rval = -2;
+      goto out;
+    }
+
+  result = re_search_internal (bufp, string, length, start, range, stop,
+			       nregs, pmatch, eflags);
+
+  rval = 0;
+
+  /* I hope we needn't fill ther regs with -1's when no match was found.  */
+  if (result != REG_NOERROR)
+    rval = -1;
+  else if (regs != NULL)
+    {
+      /* If caller wants register contents data back, copy them.  */
+      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+					   bufp->regs_allocated);
+      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+	rval = -2;
+    }
+
+  if (BE (rval == 0, 1))
+    {
+      if (ret_len)
+	{
+	  assert (pmatch[0].rm_so == start);
+	  rval = pmatch[0].rm_eo - start;
+	}
+      else
+	rval = pmatch[0].rm_so;
+    }
+  re_free (pmatch);
+ out:
+  __libc_lock_unlock (dfa->lock);
+  return rval;
+}
+
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
+    struct re_registers *regs;
+    regmatch_t *pmatch;
+    int nregs, regs_allocated;
+{
+  int rval = REGS_REALLOCATE;
+  int i;
+  int need_regs = nregs + 1;
+  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+     uses.  */
+
+  /* Have the register data arrays been allocated?  */
+  if (regs_allocated == REGS_UNALLOCATED)
+    { /* No.  So allocate them with malloc.  */
+      regs->start = re_malloc (regoff_t, need_regs);
+      regs->end = re_malloc (regoff_t, need_regs);
+      if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+	return REGS_UNALLOCATED;
+      regs->num_regs = need_regs;
+    }
+  else if (regs_allocated == REGS_REALLOCATE)
+    { /* Yes.  If we need more elements than were already
+	 allocated, reallocate them.  If we need fewer, just
+	 leave it alone.  */
+      if (BE (need_regs > regs->num_regs, 0))
+	{
+	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
+	  regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
+	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+	    return REGS_UNALLOCATED;
+	  regs->start = new_start;
+	  regs->end = new_end;
+	  regs->num_regs = need_regs;
+	}
+    }
+  else
+    {
+      assert (regs_allocated == REGS_FIXED);
+      /* This function may not be called with REGS_FIXED and nregs too big.  */
+      assert (regs->num_regs >= nregs);
+      rval = REGS_FIXED;
+    }
+
+  /* Copy the regs.  */
+  for (i = 0; i < nregs; ++i)
+    {
+      regs->start[i] = pmatch[i].rm_so;
+      regs->end[i] = pmatch[i].rm_eo;
+    }
+  for ( ; i < regs->num_regs; ++i)
+    regs->start[i] = regs->end[i] = -1;
+
+  return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
+   this memory for recording register information.  STARTS and ENDS
+   must be allocated using the malloc library routine, and must each
+   be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+    struct re_pattern_buffer *bufp;
+    struct re_registers *regs;
+    unsigned num_regs;
+    regoff_t *starts, *ends;
+{
+  if (num_regs)
+    {
+      bufp->regs_allocated = REGS_REALLOCATE;
+      regs->num_regs = num_regs;
+      regs->start = starts;
+      regs->end = ends;
+    }
+  else
+    {
+      bufp->regs_allocated = REGS_UNALLOCATED;
+      regs->num_regs = 0;
+      regs->start = regs->end = (regoff_t *) 0;
+    }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+int
+# ifdef _LIBC
+weak_function
+# endif
+re_exec (s)
+     const char *s;
+{
+  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.  */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
+   mingings with regexec.  START, and RANGE have the same meanings
+   with re_search.
+   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+   otherwise return the error code.
+   Note: We assume front end functions already check ranges.
+   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
+
+static reg_errcode_t
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+		    eflags)
+    const regex_t *preg;
+    const char *string;
+    int length, start, range, stop, eflags;
+    size_t nmatch;
+    regmatch_t pmatch[];
+{
+  reg_errcode_t err;
+  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+  int left_lim, right_lim, incr;
+  int fl_longest_match, match_first, match_kind, match_last = -1;
+  int extra_nmatch;
+  int sb, ch;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+  re_match_context_t mctx = { .dfa = dfa };
+#else
+  re_match_context_t mctx;
+#endif
+  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
+  RE_TRANSLATE_TYPE t = preg->translate;
+
+#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
+  memset (&mctx, '\0', sizeof (re_match_context_t));
+  mctx.dfa = dfa;
+#endif
+
+  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
+  nmatch -= extra_nmatch;
+
+  /* Check if the DFA haven't been compiled.  */
+  if (BE (preg->used == 0 || dfa->init_state == NULL
+	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+	  || dfa->init_state_begbuf == NULL, 0))
+    return REG_NOMATCH;
+
+#ifdef DEBUG
+  /* We assume front-end functions already check them.  */
+  assert (start + range >= 0 && start + range <= length);
+#endif
+
+  /* If initial states with non-begbuf contexts have no elements,
+     the regex must be anchored.  If preg->newline_anchor is set,
+     we'll never use init_state_nl, so do not check it.  */
+  if (dfa->init_state->nodes.nelem == 0
+      && dfa->init_state_word->nodes.nelem == 0
+      && (dfa->init_state_nl->nodes.nelem == 0
+	  || !preg->newline_anchor))
+    {
+      if (start != 0 && start + range != 0)
+        return REG_NOMATCH;
+      start = range = 0;
+    }
+
+  /* We must check the longest matching, if nmatch > 0.  */
+  fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
+			    preg->translate, preg->syntax & RE_ICASE, dfa);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+  mctx.input.stop = stop;
+  mctx.input.raw_stop = stop;
+  mctx.input.newline_anchor = preg->newline_anchor;
+
+  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+
+  /* We will log all the DFA states through which the dfa pass,
+     if nmatch > 1, or this dfa has "multibyte node", which is a
+     back-reference or a node which can accept multibyte character or
+     multi character collating element.  */
+  if (nmatch > 1 || dfa->has_mb_node)
+    {
+      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
+      if (BE (mctx.state_log == NULL, 0))
+	{
+	  err = REG_ESPACE;
+	  goto free_return;
+	}
+    }
+  else
+    mctx.state_log = NULL;
+
+  match_first = start;
+  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
+
+  /* Check incrementally whether of not the input string match.  */
+  incr = (range < 0) ? -1 : 1;
+  left_lim = (range < 0) ? start + range : start;
+  right_lim = (range < 0) ? start : start + range;
+  sb = dfa->mb_cur_max == 1;
+  match_kind =
+    (fastmap
+     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+	| (range >= 0 ? 2 : 0)
+	| (t != NULL ? 1 : 0))
+     : 8);
+
+  for (;; match_first += incr)
+    {
+      err = REG_NOMATCH;
+      if (match_first < left_lim || right_lim < match_first)
+	goto free_return;
+
+      /* Advance as rapidly as possible through the string, until we
+	 find a plausible place to start matching.  This may be done
+	 with varying efficiency, so there are various possibilities:
+	 only the most common of them are specialized, in order to
+	 save on code size.  We use a switch statement for speed.  */
+      switch (match_kind)
+	{
+	case 8:
+	  /* No fastmap.  */
+	  break;
+
+	case 7:
+	  /* Fastmap with single-byte translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[t[(unsigned char) string[match_first]]])
+	    ++match_first;
+	  goto forward_match_found_start_or_reached_end;
+
+	case 6:
+	  /* Fastmap without translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[(unsigned char) string[match_first]])
+	    ++match_first;
+
+	forward_match_found_start_or_reached_end:
+	  if (BE (match_first == right_lim, 0))
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (!fastmap[t ? t[ch] : ch])
+		goto free_return;
+	    }
+	  break;
+
+	case 4:
+	case 5:
+	  /* Fastmap without multi-byte translation, match backwards.  */
+	  while (match_first >= left_lim)
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (fastmap[t ? t[ch] : ch])
+		break;
+	      --match_first;
+	    }
+	  if (match_first < left_lim)
+	    goto free_return;
+	  break;
+
+	default:
+	  /* In this case, we can't determine easily the current byte,
+	     since it might be a component byte of a multibyte
+	     character.  Then we use the constructed buffer instead.  */
+	  for (;;)
+	    {
+	      /* If MATCH_FIRST is out of the valid range, reconstruct the
+		 buffers.  */
+	      unsigned int offset = match_first - mctx.input.raw_mbs_idx;
+	      if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
+		{
+		  err = re_string_reconstruct (&mctx.input, match_first,
+					       eflags);
+		  if (BE (err != REG_NOERROR, 0))
+		    goto free_return;
+
+		  offset = match_first - mctx.input.raw_mbs_idx;
+		}
+	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+		 Note that MATCH_FIRST must not be smaller than 0.  */
+	      ch = (match_first >= length
+		    ? 0 : re_string_byte_at (&mctx.input, offset));
+	      if (fastmap[ch])
+		break;
+	      match_first += incr;
+	      if (match_first < left_lim || match_first > right_lim)
+	        {
+	          err = REG_NOMATCH;
+	          goto free_return;
+	        }
+	    }
+	  break;
+	}
+
+      /* Reconstruct the buffers so that the matcher can assume that
+	 the matching starts from the beginning of the buffer.  */
+      err = re_string_reconstruct (&mctx.input, match_first, eflags);
+      if (BE (err != REG_NOERROR, 0))
+	goto free_return;
+
+#ifdef RE_ENABLE_I18N
+     /* Don't consider this char as a possible match start if it part,
+	yet isn't the head, of a multibyte character.  */
+      if (!sb && !re_string_first_byte (&mctx.input, 0))
+	continue;
+#endif
+
+      /* It seems to be appropriate one, then use the matcher.  */
+      /* We assume that the matching starts from 0.  */
+      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+      match_last = check_matching (&mctx, fl_longest_match,
+				   range >= 0 ? &match_first : NULL);
+      if (match_last != -1)
+	{
+	  if (BE (match_last == -2, 0))
+	    {
+	      err = REG_ESPACE;
+	      goto free_return;
+	    }
+	  else
+	    {
+	      mctx.match_last = match_last;
+	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+		{
+		  re_dfastate_t *pstate = mctx.state_log[match_last];
+		  mctx.last_node = check_halt_state_context (&mctx, pstate,
+							     match_last);
+		}
+	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+		  || dfa->nbackref)
+		{
+		  err = prune_impossible_nodes (&mctx);
+		  if (err == REG_NOERROR)
+		    break;
+		  if (BE (err != REG_NOMATCH, 0))
+		    goto free_return;
+		  match_last = -1;
+		}
+	      else
+		break; /* We found a match.  */
+	    }
+	}
+
+      match_ctx_clean (&mctx);
+    }
+
+#ifdef DEBUG
+  assert (match_last != -1);
+  assert (err == REG_NOERROR);
+#endif
+
+  /* Set pmatch[] if we need.  */
+  if (nmatch > 0)
+    {
+      int reg_idx;
+
+      /* Initialize registers.  */
+      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
+	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+      /* Set the points where matching start/end.  */
+      pmatch[0].rm_so = 0;
+      pmatch[0].rm_eo = mctx.match_last;
+
+      if (!preg->no_sub && nmatch > 1)
+	{
+	  err = set_regs (preg, &mctx, nmatch, pmatch,
+			  dfa->has_plural_match && dfa->nbackref > 0);
+	  if (BE (err != REG_NOERROR, 0))
+	    goto free_return;
+	}
+
+      /* At last, add the offset to the each registers, since we slided
+	 the buffers so that we could assume that the matching starts
+	 from 0.  */
+      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+	if (pmatch[reg_idx].rm_so != -1)
+	  {
+#ifdef RE_ENABLE_I18N
+	    if (BE (mctx.input.offsets_needed != 0, 0))
+	      {
+		pmatch[reg_idx].rm_so =
+		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
+		   ? mctx.input.valid_raw_len
+		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
+		pmatch[reg_idx].rm_eo =
+		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
+		   ? mctx.input.valid_raw_len
+		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
+	      }
+#else
+	    assert (mctx.input.offsets_needed == 0);
+#endif
+	    pmatch[reg_idx].rm_so += match_first;
+	    pmatch[reg_idx].rm_eo += match_first;
+	  }
+      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
+	{
+	  pmatch[nmatch + reg_idx].rm_so = -1;
+	  pmatch[nmatch + reg_idx].rm_eo = -1;
+	}
+
+      if (dfa->subexp_map)
+        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+          if (dfa->subexp_map[reg_idx] != reg_idx)
+            {
+              pmatch[reg_idx + 1].rm_so
+                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+              pmatch[reg_idx + 1].rm_eo
+                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+            }
+    }
+
+ free_return:
+  re_free (mctx.state_log);
+  if (dfa->nbackref)
+    match_ctx_free (&mctx);
+  re_string_destruct (&mctx.input);
+  return err;
+}
+
+static reg_errcode_t
+prune_impossible_nodes (mctx)
+     re_match_context_t *mctx;
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int halt_node, match_last;
+  reg_errcode_t ret;
+  re_dfastate_t **sifted_states;
+  re_dfastate_t **lim_states = NULL;
+  re_sift_context_t sctx;
+#ifdef DEBUG
+  assert (mctx->state_log != NULL);
+#endif
+  match_last = mctx->match_last;
+  halt_node = mctx->last_node;
+  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+  if (BE (sifted_states == NULL, 0))
+    {
+      ret = REG_ESPACE;
+      goto free_return;
+    }
+  if (dfa->nbackref)
+    {
+      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+      if (BE (lim_states == NULL, 0))
+	{
+	  ret = REG_ESPACE;
+	  goto free_return;
+	}
+      while (1)
+	{
+	  memset (lim_states, '\0',
+		  sizeof (re_dfastate_t *) * (match_last + 1));
+	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+			 match_last);
+	  ret = sift_states_backward (mctx, &sctx);
+	  re_node_set_free (&sctx.limits);
+	  if (BE (ret != REG_NOERROR, 0))
+	      goto free_return;
+	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
+	    break;
+	  do
+	    {
+	      --match_last;
+	      if (match_last < 0)
+		{
+		  ret = REG_NOMATCH;
+		  goto free_return;
+		}
+	    } while (mctx->state_log[match_last] == NULL
+		     || !mctx->state_log[match_last]->halt);
+	  halt_node = check_halt_state_context (mctx,
+						mctx->state_log[match_last],
+						match_last);
+	}
+      ret = merge_state_array (dfa, sifted_states, lim_states,
+			       match_last + 1);
+      re_free (lim_states);
+      lim_states = NULL;
+      if (BE (ret != REG_NOERROR, 0))
+	goto free_return;
+    }
+  else
+    {
+      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
+      ret = sift_states_backward (mctx, &sctx);
+      re_node_set_free (&sctx.limits);
+      if (BE (ret != REG_NOERROR, 0))
+	goto free_return;
+    }
+  re_free (mctx->state_log);
+  mctx->state_log = sifted_states;
+  sifted_states = NULL;
+  mctx->last_node = halt_node;
+  mctx->match_last = match_last;
+  ret = REG_NOERROR;
+ free_return:
+  re_free (sifted_states);
+  re_free (lim_states);
+  return ret;
+}
+
+/* Acquire an initial state and return it.
+   We must select appropriate initial state depending on the context,
+   since initial states may have constraints like "\<", "^", etc..  */
+
+static inline re_dfastate_t *
+__attribute ((always_inline)) internal_function
+acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
+			    int idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  if (dfa->init_state->has_constraint)
+    {
+      unsigned int context;
+      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
+      if (IS_WORD_CONTEXT (context))
+	return dfa->init_state_word;
+      else if (IS_ORDINARY_CONTEXT (context))
+	return dfa->init_state;
+      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+	return dfa->init_state_begbuf;
+      else if (IS_NEWLINE_CONTEXT (context))
+	return dfa->init_state_nl;
+      else if (IS_BEGBUF_CONTEXT (context))
+	{
+	  /* It is relatively rare case, then calculate on demand.  */
+	  return re_acquire_state_context (err, dfa,
+					   dfa->init_state->entrance_nodes,
+					   context);
+	}
+      else
+	/* Must not happen?  */
+	return dfa->init_state;
+    }
+  else
+    return dfa->init_state;
+}
+
+/* Check whether the regular expression match input string INPUT or not,
+   and return the index where the matching end, return -1 if not match,
+   or return -2 in case of an error.
+   FL_LONGEST_MATCH means we want the POSIX longest matching.
+   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
+   next place where we may want to try matching.
+   Note that the matcher assume that the maching starts from the current
+   index of the buffer.  */
+
+static int
+internal_function
+check_matching (re_match_context_t *mctx, int fl_longest_match,
+		int *p_match_first)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int match = 0;
+  int match_last = -1;
+  int cur_str_idx = re_string_cur_idx (&mctx->input);
+  re_dfastate_t *cur_state;
+  int at_init_state = p_match_first != NULL;
+  int next_start_idx = cur_str_idx;
+
+  err = REG_NOERROR;
+  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
+  /* An initial state must not be NULL (invalid).  */
+  if (BE (cur_state == NULL, 0))
+    {
+      assert (err == REG_ESPACE);
+      return -2;
+    }
+
+  if (mctx->state_log != NULL)
+    {
+      mctx->state_log[cur_str_idx] = cur_state;
+
+      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
+	 later.  E.g. Processing back references.  */
+      if (BE (dfa->nbackref, 0))
+	{
+	  at_init_state = 0;
+	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+
+	  if (cur_state->has_backref)
+	    {
+	      err = transit_state_bkref (mctx, &cur_state->nodes);
+	      if (BE (err != REG_NOERROR, 0))
+	        return err;
+	    }
+	}
+    }
+
+  /* If the RE accepts NULL string.  */
+  if (BE (cur_state->halt, 0))
+    {
+      if (!cur_state->has_constraint
+	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
+	{
+	  if (!fl_longest_match)
+	    return cur_str_idx;
+	  else
+	    {
+	      match_last = cur_str_idx;
+	      match = 1;
+	    }
+	}
+    }
+
+  while (!re_string_eoi (&mctx->input))
+    {
+      re_dfastate_t *old_state = cur_state;
+      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+          || (BE (next_char_idx >= mctx->input.valid_len, 0)
+              && mctx->input.valid_len < mctx->input.len))
+        {
+          err = extend_buffers (mctx);
+          if (BE (err != REG_NOERROR, 0))
+	    {
+	      assert (err == REG_ESPACE);
+	      return -2;
+	    }
+        }
+
+      cur_state = transit_state (&err, mctx, cur_state);
+      if (mctx->state_log != NULL)
+	cur_state = merge_state_with_log (&err, mctx, cur_state);
+
+      if (cur_state == NULL)
+	{
+	  /* Reached the invalid state or an error.  Try to recover a valid
+	     state using the state log, if available and if we have not
+	     already found a valid (even if not the longest) match.  */
+	  if (BE (err != REG_NOERROR, 0))
+	    return -2;
+
+	  if (mctx->state_log == NULL
+	      || (match && !fl_longest_match)
+	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
+	    break;
+	}
+
+      if (BE (at_init_state, 0))
+	{
+	  if (old_state == cur_state)
+	    next_start_idx = next_char_idx;
+	  else
+	    at_init_state = 0;
+	}
+
+      if (cur_state->halt)
+	{
+	  /* Reached a halt state.
+	     Check the halt state can satisfy the current context.  */
+	  if (!cur_state->has_constraint
+	      || check_halt_state_context (mctx, cur_state,
+					   re_string_cur_idx (&mctx->input)))
+	    {
+	      /* We found an appropriate halt state.  */
+	      match_last = re_string_cur_idx (&mctx->input);
+	      match = 1;
+
+	      /* We found a match, do not modify match_first below.  */
+	      p_match_first = NULL;
+	      if (!fl_longest_match)
+		break;
+	    }
+	}
+    }
+
+  if (p_match_first)
+    *p_match_first += next_start_idx;
+
+  return match_last;
+}
+
+/* Check NODE match the current context.  */
+
+static int
+internal_function
+check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
+{
+  re_token_type_t type = dfa->nodes[node].type;
+  unsigned int constraint = dfa->nodes[node].constraint;
+  if (type != END_OF_RE)
+    return 0;
+  if (!constraint)
+    return 1;
+  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
+    return 0;
+  return 1;
+}
+
+/* Check the halt state STATE match the current context.
+   Return 0 if not match, if the node, STATE has, is a halt node and
+   match the context, return the node.  */
+
+static int
+internal_function
+check_halt_state_context (const re_match_context_t *mctx,
+			  const re_dfastate_t *state, int idx)
+{
+  int i;
+  unsigned int context;
+#ifdef DEBUG
+  assert (state->halt);
+#endif
+  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
+  for (i = 0; i < state->nodes.nelem; ++i)
+    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
+      return state->nodes.elems[i];
+  return 0;
+}
+
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+   corresponding to the DFA).
+   Return the destination node, and update EPS_VIA_NODES, return -1 in case
+   of errors.  */
+
+static int
+internal_function
+proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
+		   int *pidx, int node, re_node_set *eps_via_nodes,
+		   struct re_fail_stack_t *fs)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int i, err;
+  if (IS_EPSILON_NODE (dfa->nodes[node].type))
+    {
+      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+      re_node_set *edests = &dfa->edests[node];
+      int dest_node;
+      err = re_node_set_insert (eps_via_nodes, node);
+      if (BE (err < 0, 0))
+	return -2;
+      /* Pick up a valid destination, or return -1 if none is found.  */
+      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
+	{
+	  int candidate = edests->elems[i];
+	  if (!re_node_set_contains (cur_nodes, candidate))
+	    continue;
+          if (dest_node == -1)
+	    dest_node = candidate;
+
+          else
+	    {
+	      /* In order to avoid infinite loop like "(a*)*", return the second
+	         epsilon-transition if the first was already considered.  */
+	      if (re_node_set_contains (eps_via_nodes, dest_node))
+	        return candidate;
+
+	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
+	      else if (fs != NULL
+		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
+				           eps_via_nodes))
+		return -2;
+
+	      /* We know we are going to exit.  */
+	      break;
+	    }
+	}
+      return dest_node;
+    }
+  else
+    {
+      int naccepted = 0;
+      re_token_type_t type = dfa->nodes[node].type;
+
+#ifdef RE_ENABLE_I18N
+      if (dfa->nodes[node].accept_mb)
+	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
+      else
+#endif /* RE_ENABLE_I18N */
+      if (type == OP_BACK_REF)
+	{
+	  int subexp_idx = dfa->nodes[node].opr.idx + 1;
+	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+	  if (fs != NULL)
+	    {
+	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
+		return -1;
+	      else if (naccepted)
+		{
+		  char *buf = (char *) re_string_get_buffer (&mctx->input);
+		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+			      naccepted) != 0)
+		    return -1;
+		}
+	    }
+
+	  if (naccepted == 0)
+	    {
+	      int dest_node;
+	      err = re_node_set_insert (eps_via_nodes, node);
+	      if (BE (err < 0, 0))
+		return -2;
+	      dest_node = dfa->edests[node].elems[0];
+	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+					dest_node))
+		return dest_node;
+	    }
+	}
+
+      if (naccepted != 0
+	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
+	{
+	  int dest_node = dfa->nexts[node];
+	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
+	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
+		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+					       dest_node)))
+	    return -1;
+	  re_node_set_empty (eps_via_nodes);
+	  return dest_node;
+	}
+    }
+  return -1;
+}
+
+static reg_errcode_t
+internal_function
+push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
+		 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+  reg_errcode_t err;
+  int num = fs->num++;
+  if (fs->num == fs->alloc)
+    {
+      struct re_fail_stack_ent_t *new_array;
+      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
+				       * fs->alloc * 2));
+      if (new_array == NULL)
+	return REG_ESPACE;
+      fs->alloc *= 2;
+      fs->stack = new_array;
+    }
+  fs->stack[num].idx = str_idx;
+  fs->stack[num].node = dest_node;
+  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+  if (fs->stack[num].regs == NULL)
+    return REG_ESPACE;
+  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
+  return err;
+}
+
+static int
+internal_function
+pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+		regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+  int num = --fs->num;
+  assert (num >= 0);
+  *pidx = fs->stack[num].idx;
+  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+  re_node_set_free (eps_via_nodes);
+  re_free (fs->stack[num].regs);
+  *eps_via_nodes = fs->stack[num].eps_via_nodes;
+  return fs->stack[num].node;
+}
+
+/* Set the positions where the subexpressions are starts/ends to registers
+   PMATCH.
+   Note: We assume that pmatch[0] is already set, and
+   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
+
+static reg_errcode_t
+internal_function
+set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
+	  regmatch_t *pmatch, int fl_backtrack)
+{
+  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+  int idx, cur_node;
+  re_node_set eps_via_nodes;
+  struct re_fail_stack_t *fs;
+  struct re_fail_stack_t fs_body = { 0, 2, NULL };
+  regmatch_t *prev_idx_match;
+  int prev_idx_match_malloced = 0;
+
+#ifdef DEBUG
+  assert (nmatch > 1);
+  assert (mctx->state_log != NULL);
+#endif
+  if (fl_backtrack)
+    {
+      fs = &fs_body;
+      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+      if (fs->stack == NULL)
+	return REG_ESPACE;
+    }
+  else
+    fs = NULL;
+
+  cur_node = dfa->init_node;
+  re_node_set_init_empty (&eps_via_nodes);
+
+  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
+    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
+  else
+    {
+      prev_idx_match = re_malloc (regmatch_t, nmatch);
+      if (prev_idx_match == NULL)
+	{
+	  free_fail_stack_return (fs);
+	  return REG_ESPACE;
+	}
+      prev_idx_match_malloced = 1;
+    }
+  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+
+  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
+    {
+      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
+
+      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+	{
+	  int reg_idx;
+	  if (fs)
+	    {
+	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
+		  break;
+	      if (reg_idx == nmatch)
+		{
+		  re_node_set_free (&eps_via_nodes);
+		  if (prev_idx_match_malloced)
+		    re_free (prev_idx_match);
+		  return free_fail_stack_return (fs);
+		}
+	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+					 &eps_via_nodes);
+	    }
+	  else
+	    {
+	      re_node_set_free (&eps_via_nodes);
+	      if (prev_idx_match_malloced)
+		re_free (prev_idx_match);
+	      return REG_NOERROR;
+	    }
+	}
+
+      /* Proceed to next node.  */
+      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
+				    &eps_via_nodes, fs);
+
+      if (BE (cur_node < 0, 0))
+	{
+	  if (BE (cur_node == -2, 0))
+	    {
+	      re_node_set_free (&eps_via_nodes);
+	      if (prev_idx_match_malloced)
+		re_free (prev_idx_match);
+	      free_fail_stack_return (fs);
+	      return REG_ESPACE;
+	    }
+	  if (fs)
+	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+				       &eps_via_nodes);
+	  else
+	    {
+	      re_node_set_free (&eps_via_nodes);
+	      if (prev_idx_match_malloced)
+		re_free (prev_idx_match);
+	      return REG_NOMATCH;
+	    }
+	}
+    }
+  re_node_set_free (&eps_via_nodes);
+  if (prev_idx_match_malloced)
+    re_free (prev_idx_match);
+  return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+internal_function
+free_fail_stack_return (struct re_fail_stack_t *fs)
+{
+  if (fs)
+    {
+      int fs_idx;
+      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+	{
+	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+	  re_free (fs->stack[fs_idx].regs);
+	}
+      re_free (fs->stack);
+    }
+  return REG_NOERROR;
+}
+
+static void
+internal_function
+update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+	     regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
+{
+  int type = dfa->nodes[cur_node].type;
+  if (type == OP_OPEN_SUBEXP)
+    {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+
+      /* We are at the first node of this sub expression.  */
+      if (reg_num < nmatch)
+	{
+	  pmatch[reg_num].rm_so = cur_idx;
+	  pmatch[reg_num].rm_eo = -1;
+	}
+    }
+  else if (type == OP_CLOSE_SUBEXP)
+    {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+      if (reg_num < nmatch)
+	{
+	  /* We are at the last node of this sub expression.  */
+	  if (pmatch[reg_num].rm_so < cur_idx)
+	    {
+	      pmatch[reg_num].rm_eo = cur_idx;
+	      /* This is a non-empty match or we are not inside an optional
+		 subexpression.  Accept this right away.  */
+	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+	    }
+	  else
+	    {
+	      if (dfa->nodes[cur_node].opt_subexp
+		  && prev_idx_match[reg_num].rm_so != -1)
+		/* We transited through an empty match for an optional
+		   subexpression, like (a?)*, and this is not the subexp's
+		   first match.  Copy back the old content of the registers
+		   so that matches of an inner subexpression are undone as
+		   well, like in ((a?))*.  */
+		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
+	      else
+		/* We completed a subexpression, but it may be part of
+		   an optional one, so do not update PREV_IDX_MATCH.  */
+		pmatch[reg_num].rm_eo = cur_idx;
+	    }
+	}
+    }
+}
+
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
+   and sift the nodes in each states according to the following rules.
+   Updated state_log will be wrote to STATE_LOG.
+
+   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
+	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
+	the LAST_NODE, we throw away the node `a'.
+     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
+	string `s' and transit to `b':
+	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
+	   away the node `a'.
+	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
+	    thrown away, we throw away the node `a'.
+     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
+	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
+	   node `a'.
+	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
+	    we throw away the node `a'.  */
+
+#define STATE_NODE_CONTAINS(state,node) \
+  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
+
+static reg_errcode_t
+internal_function
+sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
+{
+  reg_errcode_t err;
+  int null_cnt = 0;
+  int str_idx = sctx->last_str_idx;
+  re_node_set cur_dest;
+
+#ifdef DEBUG
+  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
+#endif
+
+  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
+     transit to the last_node and the last_node itself.  */
+  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+
+  /* Then check each states in the state_log.  */
+  while (str_idx > 0)
+    {
+      /* Update counters.  */
+      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+      if (null_cnt > mctx->max_mb_elem_len)
+	{
+	  memset (sctx->sifted_states, '\0',
+		  sizeof (re_dfastate_t *) * str_idx);
+	  re_node_set_free (&cur_dest);
+	  return REG_NOERROR;
+	}
+      re_node_set_empty (&cur_dest);
+      --str_idx;
+
+      if (mctx->state_log[str_idx])
+	{
+	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
+          if (BE (err != REG_NOERROR, 0))
+	    goto free_return;
+	}
+
+      /* Add all the nodes which satisfy the following conditions:
+	 - It can epsilon transit to a node in CUR_DEST.
+	 - It is in CUR_SRC.
+	 And update state_log.  */
+      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+      if (BE (err != REG_NOERROR, 0))
+	goto free_return;
+    }
+  err = REG_NOERROR;
+ free_return:
+  re_node_set_free (&cur_dest);
+  return err;
+}
+
+static reg_errcode_t
+internal_function
+build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
+		     int str_idx, re_node_set *cur_dest)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
+  int i;
+
+  /* Then build the next sifted state.
+     We build the next sifted state on `cur_dest', and update
+     `sifted_states[str_idx]' with `cur_dest'.
+     Note:
+     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+     `cur_src' points the node_set of the old `state_log[str_idx]'
+     (with the epsilon nodes pre-filtered out).  */
+  for (i = 0; i < cur_src->nelem; i++)
+    {
+      int prev_node = cur_src->elems[i];
+      int naccepted = 0;
+      int ret;
+
+#ifdef DEBUG
+      re_token_type_t type = dfa->nodes[prev_node].type;
+      assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+      /* If the node may accept `multi byte'.  */
+      if (dfa->nodes[prev_node].accept_mb)
+	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
+					 str_idx, sctx->last_str_idx);
+#endif /* RE_ENABLE_I18N */
+
+      /* We don't check backreferences here.
+	 See update_cur_sifted_state().  */
+      if (!naccepted
+	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
+	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+				  dfa->nexts[prev_node]))
+	naccepted = 1;
+
+      if (naccepted == 0)
+	continue;
+
+      if (sctx->limits.nelem)
+	{
+	  int to_idx = str_idx + naccepted;
+	  if (check_dst_limits (mctx, &sctx->limits,
+				dfa->nexts[prev_node], to_idx,
+				prev_node, str_idx))
+	    continue;
+	}
+      ret = re_node_set_insert (cur_dest, prev_node);
+      if (BE (ret == -1, 0))
+	return REG_ESPACE;
+    }
+
+  return REG_NOERROR;
+}
+
+/* Helper functions.  */
+
+static reg_errcode_t
+internal_function
+clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
+{
+  int top = mctx->state_log_top;
+
+  if (next_state_log_idx >= mctx->input.bufs_len
+      || (next_state_log_idx >= mctx->input.valid_len
+	  && mctx->input.valid_len < mctx->input.len))
+    {
+      reg_errcode_t err;
+      err = extend_buffers (mctx);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  if (top < next_state_log_idx)
+    {
+      memset (mctx->state_log + top + 1, '\0',
+	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
+      mctx->state_log_top = next_state_log_idx;
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
+		   re_dfastate_t **src, int num)
+{
+  int st_idx;
+  reg_errcode_t err;
+  for (st_idx = 0; st_idx < num; ++st_idx)
+    {
+      if (dst[st_idx] == NULL)
+	dst[st_idx] = src[st_idx];
+      else if (src[st_idx] != NULL)
+	{
+	  re_node_set merged_set;
+	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
+					&src[st_idx]->nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
+	  re_node_set_free (&merged_set);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+update_cur_sifted_state (const re_match_context_t *mctx,
+			 re_sift_context_t *sctx, int str_idx,
+			 re_node_set *dest_nodes)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err = REG_NOERROR;
+  const re_node_set *candidates;
+  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
+		: &mctx->state_log[str_idx]->nodes);
+
+  if (dest_nodes->nelem == 0)
+    sctx->sifted_states[str_idx] = NULL;
+  else
+    {
+      if (candidates)
+	{
+	  /* At first, add the nodes which can epsilon transit to a node in
+	     DEST_NODE.  */
+	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+
+	  /* Then, check the limitations in the current sift_context.  */
+	  if (sctx->limits.nelem)
+	    {
+	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+					 mctx->bkref_ents, str_idx);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	}
+
+      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  if (candidates && mctx->state_log[str_idx]->has_backref)
+    {
+      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
+		       const re_node_set *candidates)
+{
+  reg_errcode_t err = REG_NOERROR;
+  int i;
+
+  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  if (!state->inveclosure.alloc)
+    {
+      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
+      if (BE (err != REG_NOERROR, 0))
+        return REG_ESPACE;
+      for (i = 0; i < dest_nodes->nelem; i++)
+        re_node_set_merge (&state->inveclosure,
+			   dfa->inveclosures + dest_nodes->elems[i]);
+    }
+  return re_node_set_add_intersect (dest_nodes, candidates,
+				    &state->inveclosure);
+}
+
+static reg_errcode_t
+internal_function
+sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
+		       const re_node_set *candidates)
+{
+    int ecl_idx;
+    reg_errcode_t err;
+    re_node_set *inv_eclosure = dfa->inveclosures + node;
+    re_node_set except_nodes;
+    re_node_set_init_empty (&except_nodes);
+    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+      {
+	int cur_node = inv_eclosure->elems[ecl_idx];
+	if (cur_node == node)
+	  continue;
+	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
+	  {
+	    int edst1 = dfa->edests[cur_node].elems[0];
+	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
+			 ? dfa->edests[cur_node].elems[1] : -1);
+	    if ((!re_node_set_contains (inv_eclosure, edst1)
+		 && re_node_set_contains (dest_nodes, edst1))
+		|| (edst2 > 0
+		    && !re_node_set_contains (inv_eclosure, edst2)
+		    && re_node_set_contains (dest_nodes, edst2)))
+	      {
+		err = re_node_set_add_intersect (&except_nodes, candidates,
+						 dfa->inveclosures + cur_node);
+		if (BE (err != REG_NOERROR, 0))
+		  {
+		    re_node_set_free (&except_nodes);
+		    return err;
+		  }
+	      }
+	  }
+      }
+    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+      {
+	int cur_node = inv_eclosure->elems[ecl_idx];
+	if (!re_node_set_contains (&except_nodes, cur_node))
+	  {
+	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
+	    re_node_set_remove_at (dest_nodes, idx);
+	  }
+      }
+    re_node_set_free (&except_nodes);
+    return REG_NOERROR;
+}
+
+static int
+internal_function
+check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
+		  int dst_node, int dst_idx, int src_node, int src_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int lim_idx, src_pos, dst_pos;
+
+  int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
+  int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
+  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+    {
+      int subexp_idx;
+      struct re_backref_cache_entry *ent;
+      ent = mctx->bkref_ents + limits->elems[lim_idx];
+      subexp_idx = dfa->nodes[ent->node].opr.idx;
+
+      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+					   subexp_idx, dst_node, dst_idx,
+					   dst_bkref_idx);
+      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+					   subexp_idx, src_node, src_idx,
+					   src_bkref_idx);
+
+      /* In case of:
+	 <src> <dst> ( <subexp> )
+	 ( <subexp> ) <src> <dst>
+	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
+      if (src_pos == dst_pos)
+	continue; /* This is unrelated limitation.  */
+      else
+	return 1;
+    }
+  return 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
+			     int subexp_idx, int from_node, int bkref_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  const re_node_set *eclosures = dfa->eclosures + from_node;
+  int node_idx;
+
+  /* Else, we are on the boundary: examine the nodes on the epsilon
+     closure.  */
+  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+    {
+      int node = eclosures->elems[node_idx];
+      switch (dfa->nodes[node].type)
+	{
+	case OP_BACK_REF:
+	  if (bkref_idx != -1)
+	    {
+	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
+	      do
+	        {
+		  int dst, cpos;
+
+		  if (ent->node != node)
+		    continue;
+
+		  if (subexp_idx < BITSET_WORD_BITS
+		      && !(ent->eps_reachable_subexps_map
+			   & ((bitset_word_t) 1 << subexp_idx)))
+		    continue;
+
+		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
+		     OP_CLOSE_SUBEXP cases below.  But, if the
+		     destination node is the same node as the source
+		     node, don't recurse because it would cause an
+		     infinite loop: a regex that exhibits this behavior
+		     is ()\1*\1*  */
+		  dst = dfa->edests[node].elems[0];
+		  if (dst == from_node)
+		    {
+		      if (boundaries & 1)
+		        return -1;
+		      else /* if (boundaries & 2) */
+		        return 0;
+		    }
+
+		  cpos =
+		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+						 dst, bkref_idx);
+		  if (cpos == -1 /* && (boundaries & 1) */)
+		    return -1;
+		  if (cpos == 0 && (boundaries & 2))
+		    return 0;
+
+		  if (subexp_idx < BITSET_WORD_BITS)
+		    ent->eps_reachable_subexps_map
+		      &= ~((bitset_word_t) 1 << subexp_idx);
+	        }
+	      while (ent++->more);
+	    }
+	  break;
+
+	case OP_OPEN_SUBEXP:
+	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
+	    return -1;
+	  break;
+
+	case OP_CLOSE_SUBEXP:
+	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
+	    return 0;
+	  break;
+
+	default:
+	    break;
+	}
+    }
+
+  return (boundaries & 2) ? 1 : 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
+			   int subexp_idx, int from_node, int str_idx,
+			   int bkref_idx)
+{
+  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+  int boundaries;
+
+  /* If we are outside the range of the subexpression, return -1 or 1.  */
+  if (str_idx < lim->subexp_from)
+    return -1;
+
+  if (lim->subexp_to < str_idx)
+    return 1;
+
+  /* If we are within the subexpression, return 0.  */
+  boundaries = (str_idx == lim->subexp_from);
+  boundaries |= (str_idx == lim->subexp_to) << 1;
+  if (boundaries == 0)
+    return 0;
+
+  /* Else, examine epsilon closure.  */
+  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+				      from_node, bkref_idx);
+}
+
+/* Check the limitations of sub expressions LIMITS, and remove the nodes
+   which are against limitations from DEST_NODES. */
+
+static reg_errcode_t
+internal_function
+check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
+		     const re_node_set *candidates, re_node_set *limits,
+		     struct re_backref_cache_entry *bkref_ents, int str_idx)
+{
+  reg_errcode_t err;
+  int node_idx, lim_idx;
+
+  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+    {
+      int subexp_idx;
+      struct re_backref_cache_entry *ent;
+      ent = bkref_ents + limits->elems[lim_idx];
+
+      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
+	continue; /* This is unrelated limitation.  */
+
+      subexp_idx = dfa->nodes[ent->node].opr.idx;
+      if (ent->subexp_to == str_idx)
+	{
+	  int ops_node = -1;
+	  int cls_node = -1;
+	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	    {
+	      int node = dest_nodes->elems[node_idx];
+	      re_token_type_t type = dfa->nodes[node].type;
+	      if (type == OP_OPEN_SUBEXP
+		  && subexp_idx == dfa->nodes[node].opr.idx)
+		ops_node = node;
+	      else if (type == OP_CLOSE_SUBEXP
+		       && subexp_idx == dfa->nodes[node].opr.idx)
+		cls_node = node;
+	    }
+
+	  /* Check the limitation of the open subexpression.  */
+	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
+	  if (ops_node >= 0)
+	    {
+	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
+					   candidates);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+
+	  /* Check the limitation of the close subexpression.  */
+	  if (cls_node >= 0)
+	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	      {
+		int node = dest_nodes->elems[node_idx];
+		if (!re_node_set_contains (dfa->inveclosures + node,
+					   cls_node)
+		    && !re_node_set_contains (dfa->eclosures + node,
+					      cls_node))
+		  {
+		    /* It is against this limitation.
+		       Remove it form the current sifted state.  */
+		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+						 candidates);
+		    if (BE (err != REG_NOERROR, 0))
+		      return err;
+		    --node_idx;
+		  }
+	      }
+	}
+      else /* (ent->subexp_to != str_idx)  */
+	{
+	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	    {
+	      int node = dest_nodes->elems[node_idx];
+	      re_token_type_t type = dfa->nodes[node].type;
+	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
+		{
+		  if (subexp_idx != dfa->nodes[node].opr.idx)
+		    continue;
+		  /* It is against this limitation.
+		     Remove it form the current sifted state.  */
+		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+					       candidates);
+		  if (BE (err != REG_NOERROR, 0))
+		    return err;
+		}
+	    }
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
+		   int str_idx, const re_node_set *candidates)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int node_idx, node;
+  re_sift_context_t local_sctx;
+  int first_idx = search_cur_bkref_entry (mctx, str_idx);
+
+  if (first_idx == -1)
+    return REG_NOERROR;
+
+  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
+
+  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
+    {
+      int enabled_idx;
+      re_token_type_t type;
+      struct re_backref_cache_entry *entry;
+      node = candidates->elems[node_idx];
+      type = dfa->nodes[node].type;
+      /* Avoid infinite loop for the REs like "()\1+".  */
+      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
+	continue;
+      if (type != OP_BACK_REF)
+	continue;
+
+      entry = mctx->bkref_ents + first_idx;
+      enabled_idx = first_idx;
+      do
+	{
+	  int subexp_len;
+	  int to_idx;
+	  int dst_node;
+	  int ret;
+	  re_dfastate_t *cur_state;
+
+	  if (entry->node != node)
+	    continue;
+	  subexp_len = entry->subexp_to - entry->subexp_from;
+	  to_idx = str_idx + subexp_len;
+	  dst_node = (subexp_len ? dfa->nexts[node]
+		      : dfa->edests[node].elems[0]);
+
+	  if (to_idx > sctx->last_str_idx
+	      || sctx->sifted_states[to_idx] == NULL
+	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
+	      || check_dst_limits (mctx, &sctx->limits, node,
+				   str_idx, dst_node, to_idx))
+	    continue;
+
+	  if (local_sctx.sifted_states == NULL)
+	    {
+	      local_sctx = *sctx;
+	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  local_sctx.last_node = node;
+	  local_sctx.last_str_idx = str_idx;
+	  ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
+	  if (BE (ret < 0, 0))
+	    {
+	      err = REG_ESPACE;
+	      goto free_return;
+	    }
+	  cur_state = local_sctx.sifted_states[str_idx];
+	  err = sift_states_backward (mctx, &local_sctx);
+	  if (BE (err != REG_NOERROR, 0))
+	    goto free_return;
+	  if (sctx->limited_states != NULL)
+	    {
+	      err = merge_state_array (dfa, sctx->limited_states,
+				       local_sctx.sifted_states,
+				       str_idx + 1);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  local_sctx.sifted_states[str_idx] = cur_state;
+	  re_node_set_remove (&local_sctx.limits, enabled_idx);
+
+	  /* mctx->bkref_ents may have changed, reload the pointer.  */
+          entry = mctx->bkref_ents + enabled_idx;
+	}
+      while (enabled_idx++, entry++->more);
+    }
+  err = REG_NOERROR;
+ free_return:
+  if (local_sctx.sifted_states != NULL)
+    {
+      re_node_set_free (&local_sctx.limits);
+    }
+
+  return err;
+}
+
+
+#ifdef RE_ENABLE_I18N
+static int
+internal_function
+sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
+		     int node_idx, int str_idx, int max_str_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int naccepted;
+  /* Check the node can accept `multi byte'.  */
+  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
+  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
+      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
+			    dfa->nexts[node_idx]))
+    /* The node can't accept the `multi byte', or the
+       destination was already thrown away, then the node
+       could't accept the current input `multi byte'.   */
+    naccepted = 0;
+  /* Otherwise, it is sure that the node could accept
+     `naccepted' bytes input.  */
+  return naccepted;
+}
+#endif /* RE_ENABLE_I18N */
+
+
+/* Functions for state transition.  */
+
+/* Return the next state to which the current state STATE will transit by
+   accepting the current input byte, and update STATE_LOG if necessary.
+   If STATE can accept a multibyte char/collating element/back reference
+   update the destination of STATE_LOG.  */
+
+static re_dfastate_t *
+internal_function
+transit_state (reg_errcode_t *err, re_match_context_t *mctx,
+	       re_dfastate_t *state)
+{
+  re_dfastate_t **trtable;
+  unsigned char ch;
+
+#ifdef RE_ENABLE_I18N
+  /* If the current state can accept multibyte.  */
+  if (BE (state->accept_mb, 0))
+    {
+      *err = transit_state_mb (mctx, state);
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+    }
+#endif /* RE_ENABLE_I18N */
+
+  /* Then decide the next state with the single byte.  */
+#if 0
+  if (0)
+    /* don't use transition table  */
+    return transit_state_sb (err, mctx, state);
+#endif
+
+  /* Use transition table  */
+  ch = re_string_fetch_byte (&mctx->input);
+  for (;;)
+    {
+      trtable = state->trtable;
+      if (BE (trtable != NULL, 1))
+	return trtable[ch];
+
+      trtable = state->word_trtable;
+      if (BE (trtable != NULL, 1))
+        {
+	  unsigned int context;
+	  context
+	    = re_string_context_at (&mctx->input,
+				    re_string_cur_idx (&mctx->input) - 1,
+				    mctx->eflags);
+	  if (IS_WORD_CONTEXT (context))
+	    return trtable[ch + SBC_MAX];
+	  else
+	    return trtable[ch];
+	}
+
+      if (!build_trtable (mctx->dfa, state))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+
+      /* Retry, we now have a transition table.  */
+    }
+}
+
+/* Update the state_log if we need */
+re_dfastate_t *
+internal_function
+merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
+		      re_dfastate_t *next_state)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int cur_idx = re_string_cur_idx (&mctx->input);
+
+  if (cur_idx > mctx->state_log_top)
+    {
+      mctx->state_log[cur_idx] = next_state;
+      mctx->state_log_top = cur_idx;
+    }
+  else if (mctx->state_log[cur_idx] == 0)
+    {
+      mctx->state_log[cur_idx] = next_state;
+    }
+  else
+    {
+      re_dfastate_t *pstate;
+      unsigned int context;
+      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
+      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
+         the destination of a multibyte char/collating element/
+         back reference.  Then the next state is the union set of
+         these destinations and the results of the transition table.  */
+      pstate = mctx->state_log[cur_idx];
+      log_nodes = pstate->entrance_nodes;
+      if (next_state != NULL)
+        {
+          table_nodes = next_state->entrance_nodes;
+          *err = re_node_set_init_union (&next_nodes, table_nodes,
+					     log_nodes);
+          if (BE (*err != REG_NOERROR, 0))
+	    return NULL;
+        }
+      else
+        next_nodes = *log_nodes;
+      /* Note: We already add the nodes of the initial state,
+	 then we don't need to add them here.  */
+
+      context = re_string_context_at (&mctx->input,
+				      re_string_cur_idx (&mctx->input) - 1,
+				      mctx->eflags);
+      next_state = mctx->state_log[cur_idx]
+        = re_acquire_state_context (err, dfa, &next_nodes, context);
+      /* We don't need to check errors here, since the return value of
+         this function is next_state and ERR is already set.  */
+
+      if (table_nodes != NULL)
+        re_node_set_free (&next_nodes);
+    }
+
+  if (BE (dfa->nbackref, 0) && next_state != NULL)
+    {
+      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+	 later.  We must check them here, since the back references in the
+	 next state might use them.  */
+      *err = check_subexp_matching_top (mctx, &next_state->nodes,
+					cur_idx);
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+
+      /* If the next state has back references.  */
+      if (next_state->has_backref)
+	{
+	  *err = transit_state_bkref (mctx, &next_state->nodes);
+	  if (BE (*err != REG_NOERROR, 0))
+	    return NULL;
+	  next_state = mctx->state_log[cur_idx];
+	}
+    }
+
+  return next_state;
+}
+
+/* Skip bytes in the input that correspond to part of a
+   multi-byte match, then look in the log for a state
+   from which to restart matching.  */
+re_dfastate_t *
+internal_function
+find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
+{
+  re_dfastate_t *cur_state;
+  do
+    {
+      int max = mctx->state_log_top;
+      int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+      do
+	{
+          if (++cur_str_idx > max)
+            return NULL;
+          re_string_skip_bytes (&mctx->input, 1);
+	}
+      while (mctx->state_log[cur_str_idx] == NULL);
+
+      cur_state = merge_state_with_log (err, mctx, NULL);
+    }
+  while (*err == REG_NOERROR && cur_state == NULL);
+  return cur_state;
+}
+
+/* Helper functions for transit_state.  */
+
+/* From the node set CUR_NODES, pick up the nodes whose types are
+   OP_OPEN_SUBEXP and which have corresponding back references in the regular
+   expression. And register them to use them later for evaluating the
+   correspoding back references.  */
+
+static reg_errcode_t
+internal_function
+check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
+			   int str_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int node_idx;
+  reg_errcode_t err;
+
+  /* TODO: This isn't efficient.
+	   Because there might be more than one nodes whose types are
+	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+	   nodes.
+	   E.g. RE: (a){2}  */
+  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
+    {
+      int node = cur_nodes->elems[node_idx];
+      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+	  && (dfa->used_bkref_map
+	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
+	{
+	  err = match_ctx_add_subtop (mctx, node, str_idx);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+    }
+  return REG_NOERROR;
+}
+
+#if 0
+/* Return the next state to which the current state STATE will transit by
+   accepting the current input byte.  */
+
+static re_dfastate_t *
+transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
+		  re_dfastate_t *state)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  re_node_set next_nodes;
+  re_dfastate_t *next_state;
+  int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
+  unsigned int context;
+
+  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+  if (BE (*err != REG_NOERROR, 0))
+    return NULL;
+  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
+    {
+      int cur_node = state->nodes.elems[node_cnt];
+      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
+	{
+	  *err = re_node_set_merge (&next_nodes,
+				    dfa->eclosures + dfa->nexts[cur_node]);
+	  if (BE (*err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return NULL;
+	    }
+	}
+    }
+  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
+  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+  /* We don't need to check errors here, since the return value of
+     this function is next_state and ERR is already set.  */
+
+  re_node_set_free (&next_nodes);
+  re_string_skip_bytes (&mctx->input, 1);
+  return next_state;
+}
+#endif
+
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t
+internal_function
+transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int i;
+
+  for (i = 0; i < pstate->nodes.nelem; ++i)
+    {
+      re_node_set dest_nodes, *new_nodes;
+      int cur_node_idx = pstate->nodes.elems[i];
+      int naccepted, dest_idx;
+      unsigned int context;
+      re_dfastate_t *dest_state;
+
+      if (!dfa->nodes[cur_node_idx].accept_mb)
+        continue;
+
+      if (dfa->nodes[cur_node_idx].constraint)
+	{
+	  context = re_string_context_at (&mctx->input,
+					  re_string_cur_idx (&mctx->input),
+					  mctx->eflags);
+	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
+					   context))
+	    continue;
+	}
+
+      /* How many bytes the node can accept?  */
+      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
+					   re_string_cur_idx (&mctx->input));
+      if (naccepted == 0)
+	continue;
+
+      /* The node can accepts `naccepted' bytes.  */
+      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
+      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
+			       : mctx->max_mb_elem_len);
+      err = clean_state_log_if_needed (mctx, dest_idx);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+#ifdef DEBUG
+      assert (dfa->nexts[cur_node_idx] != -1);
+#endif
+      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
+
+      dest_state = mctx->state_log[dest_idx];
+      if (dest_state == NULL)
+	dest_nodes = *new_nodes;
+      else
+	{
+	  err = re_node_set_init_union (&dest_nodes,
+					dest_state->entrance_nodes, new_nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+      context = re_string_context_at (&mctx->input, dest_idx - 1,
+				      mctx->eflags);
+      mctx->state_log[dest_idx]
+	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
+      if (dest_state != NULL)
+	re_node_set_free (&dest_nodes);
+      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+	return err;
+    }
+  return REG_NOERROR;
+}
+#endif /* RE_ENABLE_I18N */
+
+static reg_errcode_t
+internal_function
+transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int i;
+  int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+  for (i = 0; i < nodes->nelem; ++i)
+    {
+      int dest_str_idx, prev_nelem, bkc_idx;
+      int node_idx = nodes->elems[i];
+      unsigned int context;
+      const re_token_t *node = dfa->nodes + node_idx;
+      re_node_set *new_dest_nodes;
+
+      /* Check whether `node' is a backreference or not.  */
+      if (node->type != OP_BACK_REF)
+	continue;
+
+      if (node->constraint)
+	{
+	  context = re_string_context_at (&mctx->input, cur_str_idx,
+					  mctx->eflags);
+	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+	    continue;
+	}
+
+      /* `node' is a backreference.
+	 Check the substring which the substring matched.  */
+      bkc_idx = mctx->nbkref_ents;
+      err = get_subexp (mctx, node_idx, cur_str_idx);
+      if (BE (err != REG_NOERROR, 0))
+	goto free_return;
+
+      /* And add the epsilon closures (which is `new_dest_nodes') of
+	 the backreference to appropriate state_log.  */
+#ifdef DEBUG
+      assert (dfa->nexts[node_idx] != -1);
+#endif
+      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
+	{
+	  int subexp_len;
+	  re_dfastate_t *dest_state;
+	  struct re_backref_cache_entry *bkref_ent;
+	  bkref_ent = mctx->bkref_ents + bkc_idx;
+	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
+	    continue;
+	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
+	  new_dest_nodes = (subexp_len == 0
+			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
+			    : dfa->eclosures + dfa->nexts[node_idx]);
+	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
+			  - bkref_ent->subexp_from);
+	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
+					  mctx->eflags);
+	  dest_state = mctx->state_log[dest_str_idx];
+	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+			: mctx->state_log[cur_str_idx]->nodes.nelem);
+	  /* Add `new_dest_node' to state_log.  */
+	  if (dest_state == NULL)
+	    {
+	      mctx->state_log[dest_str_idx]
+		= re_acquire_state_context (&err, dfa, new_dest_nodes,
+					    context);
+	      if (BE (mctx->state_log[dest_str_idx] == NULL
+		      && err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  else
+	    {
+	      re_node_set dest_nodes;
+	      err = re_node_set_init_union (&dest_nodes,
+					    dest_state->entrance_nodes,
+					    new_dest_nodes);
+	      if (BE (err != REG_NOERROR, 0))
+		{
+		  re_node_set_free (&dest_nodes);
+		  goto free_return;
+		}
+	      mctx->state_log[dest_str_idx]
+		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
+	      re_node_set_free (&dest_nodes);
+	      if (BE (mctx->state_log[dest_str_idx] == NULL
+		      && err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  /* We need to check recursively if the backreference can epsilon
+	     transit.  */
+	  if (subexp_len == 0
+	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+	    {
+	      err = check_subexp_matching_top (mctx, new_dest_nodes,
+					       cur_str_idx);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	      err = transit_state_bkref (mctx, new_dest_nodes);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	}
+    }
+  err = REG_NOERROR;
+ free_return:
+  return err;
+}
+
+/* Enumerate all the candidates which the backreference BKREF_NODE can match
+   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
+   Note that we might collect inappropriate candidates here.
+   However, the cost of checking them strictly here is too high, then we
+   delay these checking for prune_impossible_nodes().  */
+
+static reg_errcode_t
+internal_function
+get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int subexp_num, sub_top_idx;
+  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
+  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
+  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
+  if (cache_idx != -1)
+    {
+      const struct re_backref_cache_entry *entry
+	= mctx->bkref_ents + cache_idx;
+      do
+        if (entry->node == bkref_node)
+	  return REG_NOERROR; /* We already checked it.  */
+      while (entry++->more);
+    }
+
+  subexp_num = dfa->nodes[bkref_node].opr.idx;
+
+  /* For each sub expression  */
+  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
+    {
+      reg_errcode_t err;
+      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
+      re_sub_match_last_t *sub_last;
+      int sub_last_idx, sl_str, bkref_str_off;
+
+      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
+	continue; /* It isn't related.  */
+
+      sl_str = sub_top->str_idx;
+      bkref_str_off = bkref_str_idx;
+      /* At first, check the last node of sub expressions we already
+	 evaluated.  */
+      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
+	{
+	  int sl_str_diff;
+	  sub_last = sub_top->lasts[sub_last_idx];
+	  sl_str_diff = sub_last->str_idx - sl_str;
+	  /* The matched string by the sub expression match with the substring
+	     at the back reference?  */
+	  if (sl_str_diff > 0)
+	    {
+	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
+		{
+		  /* Not enough chars for a successful match.  */
+		  if (bkref_str_off + sl_str_diff > mctx->input.len)
+		    break;
+
+		  err = clean_state_log_if_needed (mctx,
+						   bkref_str_off
+						   + sl_str_diff);
+		  if (BE (err != REG_NOERROR, 0))
+		    return err;
+		  buf = (const char *) re_string_get_buffer (&mctx->input);
+		}
+	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
+		/* We don't need to search this sub expression any more.  */
+		break;
+	    }
+	  bkref_str_off += sl_str_diff;
+	  sl_str += sl_str_diff;
+	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+				bkref_str_idx);
+
+	  /* Reload buf, since the preceding call might have reallocated
+	     the buffer.  */
+	  buf = (const char *) re_string_get_buffer (&mctx->input);
+
+	  if (err == REG_NOMATCH)
+	    continue;
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+
+      if (sub_last_idx < sub_top->nlasts)
+	continue;
+      if (sub_last_idx > 0)
+	++sl_str;
+      /* Then, search for the other last nodes of the sub expression.  */
+      for (; sl_str <= bkref_str_idx; ++sl_str)
+	{
+	  int cls_node, sl_str_off;
+	  const re_node_set *nodes;
+	  sl_str_off = sl_str - sub_top->str_idx;
+	  /* The matched string by the sub expression match with the substring
+	     at the back reference?  */
+	  if (sl_str_off > 0)
+	    {
+	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
+		{
+		  /* If we are at the end of the input, we cannot match.  */
+		  if (bkref_str_off >= mctx->input.len)
+		    break;
+
+		  err = extend_buffers (mctx);
+		  if (BE (err != REG_NOERROR, 0))
+		    return err;
+
+		  buf = (const char *) re_string_get_buffer (&mctx->input);
+		}
+	      if (buf [bkref_str_off++] != buf[sl_str - 1])
+		break; /* We don't need to search this sub expression
+			  any more.  */
+	    }
+	  if (mctx->state_log[sl_str] == NULL)
+	    continue;
+	  /* Does this state have a ')' of the sub expression?  */
+	  nodes = &mctx->state_log[sl_str]->nodes;
+	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
+				       OP_CLOSE_SUBEXP);
+	  if (cls_node == -1)
+	    continue; /* No.  */
+	  if (sub_top->path == NULL)
+	    {
+	      sub_top->path = calloc (sizeof (state_array_t),
+				      sl_str - sub_top->str_idx + 1);
+	      if (sub_top->path == NULL)
+		return REG_ESPACE;
+	    }
+	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
+	     in the current context?  */
+	  err = check_arrival (mctx, sub_top->path, sub_top->node,
+			       sub_top->str_idx, cls_node, sl_str,
+			       OP_CLOSE_SUBEXP);
+	  if (err == REG_NOMATCH)
+	      continue;
+	  if (BE (err != REG_NOERROR, 0))
+	      return err;
+	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
+	  if (BE (sub_last == NULL, 0))
+	    return REG_ESPACE;
+	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+				bkref_str_idx);
+	  if (err == REG_NOMATCH)
+	    continue;
+	}
+    }
+  return REG_NOERROR;
+}
+
+/* Helper functions for get_subexp().  */
+
+/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
+   If it can arrive, register the sub expression expressed with SUB_TOP
+   and SUB_LAST.  */
+
+static reg_errcode_t
+internal_function
+get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
+		re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
+{
+  reg_errcode_t err;
+  int to_idx;
+  /* Can the subexpression arrive the back reference?  */
+  err = check_arrival (mctx, &sub_last->path, sub_last->node,
+		       sub_last->str_idx, bkref_node, bkref_str,
+		       OP_OPEN_SUBEXP);
+  if (err != REG_NOERROR)
+    return err;
+  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
+			     sub_last->str_idx);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
+  return clean_state_log_if_needed (mctx, to_idx);
+}
+
+/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
+   Search '(' if FL_OPEN, or search ')' otherwise.
+   TODO: This function isn't efficient...
+	 Because there might be more than one nodes whose types are
+	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+	 nodes.
+	 E.g. RE: (a){2}  */
+
+static int
+internal_function
+find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+		  int subexp_idx, int type)
+{
+  int cls_idx;
+  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
+    {
+      int cls_node = nodes->elems[cls_idx];
+      const re_token_t *node = dfa->nodes + cls_node;
+      if (node->type == type
+	  && node->opr.idx == subexp_idx)
+	return cls_node;
+    }
+  return -1;
+}
+
+/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
+   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
+   heavily reused.
+   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
+
+static reg_errcode_t
+internal_function
+check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
+	       int top_str, int last_node, int last_str, int type)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err = REG_NOERROR;
+  int subexp_num, backup_cur_idx, str_idx, null_cnt;
+  re_dfastate_t *cur_state = NULL;
+  re_node_set *cur_nodes, next_nodes;
+  re_dfastate_t **backup_state_log;
+  unsigned int context;
+
+  subexp_num = dfa->nodes[top_node].opr.idx;
+  /* Extend the buffer if we need.  */
+  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
+    {
+      re_dfastate_t **new_array;
+      int old_alloc = path->alloc;
+      path->alloc += last_str + mctx->max_mb_elem_len + 1;
+      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
+      if (BE (new_array == NULL, 0))
+	{
+	  path->alloc = old_alloc;
+	  return REG_ESPACE;
+	}
+      path->array = new_array;
+      memset (new_array + old_alloc, '\0',
+	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
+    }
+
+  str_idx = path->next_idx ? path->next_idx : top_str;
+
+  /* Temporary modify MCTX.  */
+  backup_state_log = mctx->state_log;
+  backup_cur_idx = mctx->input.cur_idx;
+  mctx->state_log = path->array;
+  mctx->input.cur_idx = str_idx;
+
+  /* Setup initial node set.  */
+  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+  if (str_idx == top_str)
+    {
+      err = re_node_set_init_1 (&next_nodes, top_node);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+      if (BE (err != REG_NOERROR, 0))
+	{
+	  re_node_set_free (&next_nodes);
+	  return err;
+	}
+    }
+  else
+    {
+      cur_state = mctx->state_log[str_idx];
+      if (cur_state && cur_state->has_backref)
+	{
+	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+      else
+	re_node_set_init_empty (&next_nodes);
+    }
+  if (str_idx == top_str || (cur_state && cur_state->has_backref))
+    {
+      if (next_nodes.nelem)
+	{
+	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+				    subexp_num, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+	{
+	  re_node_set_free (&next_nodes);
+	  return err;
+	}
+      mctx->state_log[str_idx] = cur_state;
+    }
+
+  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
+    {
+      re_node_set_empty (&next_nodes);
+      if (mctx->state_log[str_idx + 1])
+	{
+	  err = re_node_set_merge (&next_nodes,
+				   &mctx->state_log[str_idx + 1]->nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      if (cur_state)
+	{
+	  err = check_arrival_add_next_nodes (mctx, str_idx,
+					      &cur_state->non_eps_nodes,
+					      &next_nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      ++str_idx;
+      if (next_nodes.nelem)
+	{
+	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+				    subexp_num, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+	{
+	  re_node_set_free (&next_nodes);
+	  return err;
+	}
+      mctx->state_log[str_idx] = cur_state;
+      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
+    }
+  re_node_set_free (&next_nodes);
+  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
+	       : &mctx->state_log[last_str]->nodes);
+  path->next_idx = str_idx;
+
+  /* Fix MCTX.  */
+  mctx->state_log = backup_state_log;
+  mctx->input.cur_idx = backup_cur_idx;
+
+  /* Then check the current node set has the node LAST_NODE.  */
+  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
+    return REG_NOERROR;
+
+  return REG_NOMATCH;
+}
+
+/* Helper functions for check_arrival.  */
+
+/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
+   to NEXT_NODES.
+   TODO: This function is similar to the functions transit_state*(),
+	 however this function has many additional works.
+	 Can't we unify them?  */
+
+static reg_errcode_t
+internal_function
+check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
+			      re_node_set *cur_nodes, re_node_set *next_nodes)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int result;
+  int cur_idx;
+  reg_errcode_t err = REG_NOERROR;
+  re_node_set union_set;
+  re_node_set_init_empty (&union_set);
+  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
+    {
+      int naccepted = 0;
+      int cur_node = cur_nodes->elems[cur_idx];
+#ifdef DEBUG
+      re_token_type_t type = dfa->nodes[cur_node].type;
+      assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+      /* If the node may accept `multi byte'.  */
+      if (dfa->nodes[cur_node].accept_mb)
+	{
+	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
+					       str_idx);
+	  if (naccepted > 1)
+	    {
+	      re_dfastate_t *dest_state;
+	      int next_node = dfa->nexts[cur_node];
+	      int next_idx = str_idx + naccepted;
+	      dest_state = mctx->state_log[next_idx];
+	      re_node_set_empty (&union_set);
+	      if (dest_state)
+		{
+		  err = re_node_set_merge (&union_set, &dest_state->nodes);
+		  if (BE (err != REG_NOERROR, 0))
+		    {
+		      re_node_set_free (&union_set);
+		      return err;
+		    }
+		}
+	      result = re_node_set_insert (&union_set, next_node);
+	      if (BE (result < 0, 0))
+		{
+		  re_node_set_free (&union_set);
+		  return REG_ESPACE;
+		}
+	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
+							    &union_set);
+	      if (BE (mctx->state_log[next_idx] == NULL
+		      && err != REG_NOERROR, 0))
+		{
+		  re_node_set_free (&union_set);
+		  return err;
+		}
+	    }
+	}
+#endif /* RE_ENABLE_I18N */
+      if (naccepted
+	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
+	{
+	  result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+	  if (BE (result < 0, 0))
+	    {
+	      re_node_set_free (&union_set);
+	      return REG_ESPACE;
+	    }
+	}
+    }
+  re_node_set_free (&union_set);
+  return REG_NOERROR;
+}
+
+/* For all the nodes in CUR_NODES, add the epsilon closures of them to
+   CUR_NODES, however exclude the nodes which are:
+    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
+    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
+*/
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
+			  int ex_subexp, int type)
+{
+  reg_errcode_t err;
+  int idx, outside_node;
+  re_node_set new_nodes;
+#ifdef DEBUG
+  assert (cur_nodes->nelem);
+#endif
+  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  /* Create a new node set NEW_NODES with the nodes which are epsilon
+     closures of the node in CUR_NODES.  */
+
+  for (idx = 0; idx < cur_nodes->nelem; ++idx)
+    {
+      int cur_node = cur_nodes->elems[idx];
+      const re_node_set *eclosure = dfa->eclosures + cur_node;
+      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
+      if (outside_node == -1)
+	{
+	  /* There are no problematic nodes, just merge them.  */
+	  err = re_node_set_merge (&new_nodes, eclosure);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&new_nodes);
+	      return err;
+	    }
+	}
+      else
+	{
+	  /* There are problematic nodes, re-calculate incrementally.  */
+	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
+					      ex_subexp, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&new_nodes);
+	      return err;
+	    }
+	}
+    }
+  re_node_set_free (cur_nodes);
+  *cur_nodes = new_nodes;
+  return REG_NOERROR;
+}
+
+/* Helper function for check_arrival_expand_ecl.
+   Check incrementally the epsilon closure of TARGET, and if it isn't
+   problematic append it to DST_NODES.  */
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
+			      int target, int ex_subexp, int type)
+{
+  int cur_node;
+  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
+    {
+      int err;
+
+      if (dfa->nodes[cur_node].type == type
+	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
+	{
+	  if (type == OP_CLOSE_SUBEXP)
+	    {
+	      err = re_node_set_insert (dst_nodes, cur_node);
+	      if (BE (err == -1, 0))
+		return REG_ESPACE;
+	    }
+	  break;
+	}
+      err = re_node_set_insert (dst_nodes, cur_node);
+      if (BE (err == -1, 0))
+	return REG_ESPACE;
+      if (dfa->edests[cur_node].nelem == 0)
+	break;
+      if (dfa->edests[cur_node].nelem == 2)
+	{
+	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
+					      dfa->edests[cur_node].elems[1],
+					      ex_subexp, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+      cur_node = dfa->edests[cur_node].elems[0];
+    }
+  return REG_NOERROR;
+}
+
+
+/* For all the back references in the current state, calculate the
+   destination of the back references by the appropriate entry
+   in MCTX->BKREF_ENTS.  */
+
+static reg_errcode_t
+internal_function
+expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
+		    int cur_str, int subexp_num, int type)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+  struct re_backref_cache_entry *ent;
+
+  if (cache_idx_start == -1)
+    return REG_NOERROR;
+
+ restart:
+  ent = mctx->bkref_ents + cache_idx_start;
+  do
+    {
+      int to_idx, next_node;
+
+      /* Is this entry ENT is appropriate?  */
+      if (!re_node_set_contains (cur_nodes, ent->node))
+	continue; /* No.  */
+
+      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
+      /* Calculate the destination of the back reference, and append it
+	 to MCTX->STATE_LOG.  */
+      if (to_idx == cur_str)
+	{
+	  /* The backreference did epsilon transit, we must re-check all the
+	     node in the current state.  */
+	  re_node_set new_dests;
+	  reg_errcode_t err2, err3;
+	  next_node = dfa->edests[ent->node].elems[0];
+	  if (re_node_set_contains (cur_nodes, next_node))
+	    continue;
+	  err = re_node_set_init_1 (&new_dests, next_node);
+	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
+	  err3 = re_node_set_merge (cur_nodes, &new_dests);
+	  re_node_set_free (&new_dests);
+	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
+		  || err3 != REG_NOERROR, 0))
+	    {
+	      err = (err != REG_NOERROR ? err
+		     : (err2 != REG_NOERROR ? err2 : err3));
+	      return err;
+	    }
+	  /* TODO: It is still inefficient...  */
+	  goto restart;
+	}
+      else
+	{
+	  re_node_set union_set;
+	  next_node = dfa->nexts[ent->node];
+	  if (mctx->state_log[to_idx])
+	    {
+	      int ret;
+	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
+					next_node))
+		continue;
+	      err = re_node_set_init_copy (&union_set,
+					   &mctx->state_log[to_idx]->nodes);
+	      ret = re_node_set_insert (&union_set, next_node);
+	      if (BE (err != REG_NOERROR || ret < 0, 0))
+		{
+		  re_node_set_free (&union_set);
+		  err = err != REG_NOERROR ? err : REG_ESPACE;
+		  return err;
+		}
+	    }
+	  else
+	    {
+	      err = re_node_set_init_1 (&union_set, next_node);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
+	  re_node_set_free (&union_set);
+	  if (BE (mctx->state_log[to_idx] == NULL
+		  && err != REG_NOERROR, 0))
+	    return err;
+	}
+    }
+  while (ent++->more);
+  return REG_NOERROR;
+}
+
+/* Build transition table for the state.
+   Return 1 if succeeded, otherwise return NULL.  */
+
+static int
+internal_function
+build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
+{
+  reg_errcode_t err;
+  int i, j, ch, need_word_trtable = 0;
+  bitset_word_t elem, mask;
+  bool dests_node_malloced = false;
+  bool dest_states_malloced = false;
+  int ndests; /* Number of the destination states from `state'.  */
+  re_dfastate_t **trtable;
+  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
+  re_node_set follows, *dests_node;
+  bitset_t *dests_ch;
+  bitset_t acceptable;
+
+  struct dests_alloc
+  {
+    re_node_set dests_node[SBC_MAX];
+    bitset_t dests_ch[SBC_MAX];
+  } *dests_alloc;
+
+  /* We build DFA states which corresponds to the destination nodes
+     from `state'.  `dests_node[i]' represents the nodes which i-th
+     destination state contains, and `dests_ch[i]' represents the
+     characters which i-th destination state accepts.  */
+  if (__libc_use_alloca (sizeof (struct dests_alloc)))
+    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
+  else
+    {
+      dests_alloc = re_malloc (struct dests_alloc, 1);
+      if (BE (dests_alloc == NULL, 0))
+	return 0;
+      dests_node_malloced = true;
+    }
+  dests_node = dests_alloc->dests_node;
+  dests_ch = dests_alloc->dests_ch;
+
+  /* Initialize transiton table.  */
+  state->word_trtable = state->trtable = NULL;
+
+  /* At first, group all nodes belonging to `state' into several
+     destinations.  */
+  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
+  if (BE (ndests <= 0, 0))
+    {
+      if (dests_node_malloced)
+	free (dests_alloc);
+      /* Return 0 in case of an error, 1 otherwise.  */
+      if (ndests == 0)
+	{
+	  state->trtable = (re_dfastate_t **)
+	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
+	  return 1;
+	}
+      return 0;
+    }
+
+  err = re_node_set_alloc (&follows, ndests + 1);
+  if (BE (err != REG_NOERROR, 0))
+    goto out_free;
+
+  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+			 + ndests * 3 * sizeof (re_dfastate_t *)))
+    dest_states = (re_dfastate_t **)
+      alloca (ndests * 3 * sizeof (re_dfastate_t *));
+  else
+    {
+      dest_states = (re_dfastate_t **)
+	malloc (ndests * 3 * sizeof (re_dfastate_t *));
+      if (BE (dest_states == NULL, 0))
+	{
+out_free:
+	  if (dest_states_malloced)
+	    free (dest_states);
+	  re_node_set_free (&follows);
+	  for (i = 0; i < ndests; ++i)
+	    re_node_set_free (dests_node + i);
+	  if (dests_node_malloced)
+	    free (dests_alloc);
+	  return 0;
+	}
+      dest_states_malloced = true;
+    }
+  dest_states_word = dest_states + ndests;
+  dest_states_nl = dest_states_word + ndests;
+  bitset_empty (acceptable);
+
+  /* Then build the states for all destinations.  */
+  for (i = 0; i < ndests; ++i)
+    {
+      int next_node;
+      re_node_set_empty (&follows);
+      /* Merge the follows of this destination states.  */
+      for (j = 0; j < dests_node[i].nelem; ++j)
+	{
+	  next_node = dfa->nexts[dests_node[i].elems[j]];
+	  if (next_node != -1)
+	    {
+	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+	      if (BE (err != REG_NOERROR, 0))
+		goto out_free;
+	    }
+	}
+      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
+	goto out_free;
+      /* If the new state has context constraint,
+	 build appropriate states for these contexts.  */
+      if (dest_states[i]->has_constraint)
+	{
+	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
+							  CONTEXT_WORD);
+	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
+	    goto out_free;
+
+	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
+	    need_word_trtable = 1;
+
+	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
+							CONTEXT_NEWLINE);
+	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
+	    goto out_free;
+ 	}
+      else
+	{
+	  dest_states_word[i] = dest_states[i];
+	  dest_states_nl[i] = dest_states[i];
+	}
+      bitset_merge (acceptable, dests_ch[i]);
+    }
+
+  if (!BE (need_word_trtable, 0))
+    {
+      /* We don't care about whether the following character is a word
+	 character, or we are in a single-byte character set so we can
+	 discern by looking at the character code: allocate a
+	 256-entry transition table.  */
+      trtable = state->trtable =
+	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+      if (BE (trtable == NULL, 0))
+	goto out_free;
+
+      /* For all characters ch...:  */
+      for (i = 0; i < BITSET_WORDS; ++i)
+	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+	     elem;
+	     mask <<= 1, elem >>= 1, ++ch)
+	  if (BE (elem & 1, 0))
+	    {
+	      /* There must be exactly one destination which accepts
+		 character ch.  See group_nodes_into_DFAstates.  */
+	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+		;
+
+	      /* j-th destination accepts the word character ch.  */
+	      if (dfa->word_char[i] & mask)
+		trtable[ch] = dest_states_word[j];
+	      else
+		trtable[ch] = dest_states[j];
+	    }
+    }
+  else
+    {
+      /* We care about whether the following character is a word
+	 character, and we are in a multi-byte character set: discern
+	 by looking at the character code: build two 256-entry
+	 transition tables, one starting at trtable[0] and one
+	 starting at trtable[SBC_MAX].  */
+      trtable = state->word_trtable =
+	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
+      if (BE (trtable == NULL, 0))
+	goto out_free;
+
+      /* For all characters ch...:  */
+      for (i = 0; i < BITSET_WORDS; ++i)
+	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+	     elem;
+	     mask <<= 1, elem >>= 1, ++ch)
+	  if (BE (elem & 1, 0))
+	    {
+	      /* There must be exactly one destination which accepts
+		 character ch.  See group_nodes_into_DFAstates.  */
+	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+		;
+
+	      /* j-th destination accepts the word character ch.  */
+	      trtable[ch] = dest_states[j];
+	      trtable[ch + SBC_MAX] = dest_states_word[j];
+	    }
+    }
+
+  /* new line */
+  if (bitset_contain (acceptable, NEWLINE_CHAR))
+    {
+      /* The current state accepts newline character.  */
+      for (j = 0; j < ndests; ++j)
+	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
+	  {
+	    /* k-th destination accepts newline character.  */
+	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
+	    if (need_word_trtable)
+	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
+	    /* There must be only one destination which accepts
+	       newline.  See group_nodes_into_DFAstates.  */
+	    break;
+	  }
+    }
+
+  if (dest_states_malloced)
+    free (dest_states);
+
+  re_node_set_free (&follows);
+  for (i = 0; i < ndests; ++i)
+    re_node_set_free (dests_node + i);
+
+  if (dests_node_malloced)
+    free (dests_alloc);
+
+  return 1;
+}
+
+/* Group all nodes belonging to STATE into several destinations.
+   Then for all destinations, set the nodes belonging to the destination
+   to DESTS_NODE[i] and set the characters accepted by the destination
+   to DEST_CH[i].  This function return the number of destinations.  */
+
+static int
+internal_function
+group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
+			    re_node_set *dests_node, bitset_t *dests_ch)
+{
+  reg_errcode_t err;
+  int result;
+  int i, j, k;
+  int ndests; /* Number of the destinations from `state'.  */
+  bitset_t accepts; /* Characters a node can accept.  */
+  const re_node_set *cur_nodes = &state->nodes;
+  bitset_empty (accepts);
+  ndests = 0;
+
+  /* For all the nodes belonging to `state',  */
+  for (i = 0; i < cur_nodes->nelem; ++i)
+    {
+      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
+      re_token_type_t type = node->type;
+      unsigned int constraint = node->constraint;
+
+      /* Enumerate all single byte character this node can accept.  */
+      if (type == CHARACTER)
+	bitset_set (accepts, node->opr.c);
+      else if (type == SIMPLE_BRACKET)
+	{
+	  bitset_merge (accepts, node->opr.sbcset);
+	}
+      else if (type == OP_PERIOD)
+	{
+#ifdef RE_ENABLE_I18N
+	  if (dfa->mb_cur_max > 1)
+	    bitset_merge (accepts, dfa->sb_char);
+	  else
+#endif
+	    bitset_set_all (accepts);
+	  if (!(dfa->syntax & RE_DOT_NEWLINE))
+	    bitset_clear (accepts, '\n');
+	  if (dfa->syntax & RE_DOT_NOT_NULL)
+	    bitset_clear (accepts, '\0');
+	}
+#ifdef RE_ENABLE_I18N
+      else if (type == OP_UTF8_PERIOD)
+        {
+	  memset (accepts, '\xff', sizeof (bitset_t) / 2);
+	  if (!(dfa->syntax & RE_DOT_NEWLINE))
+	    bitset_clear (accepts, '\n');
+	  if (dfa->syntax & RE_DOT_NOT_NULL)
+	    bitset_clear (accepts, '\0');
+        }
+#endif
+      else
+	continue;
+
+      /* Check the `accepts' and sift the characters which are not
+	 match it the context.  */
+      if (constraint)
+	{
+	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
+	    {
+	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+	      bitset_empty (accepts);
+	      if (accepts_newline)
+		bitset_set (accepts, NEWLINE_CHAR);
+	      else
+		continue;
+	    }
+	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
+	    {
+	      bitset_empty (accepts);
+	      continue;
+	    }
+
+	  if (constraint & NEXT_WORD_CONSTRAINT)
+	    {
+	      bitset_word_t any_set = 0;
+	      if (type == CHARACTER && !node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
+#ifdef RE_ENABLE_I18N
+	      if (dfa->mb_cur_max > 1)
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
+	      else
+#endif
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= dfa->word_char[j]);
+	      if (!any_set)
+		continue;
+	    }
+	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
+	    {
+	      bitset_word_t any_set = 0;
+	      if (type == CHARACTER && node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
+#ifdef RE_ENABLE_I18N
+	      if (dfa->mb_cur_max > 1)
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
+	      else
+#endif
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
+	      if (!any_set)
+		continue;
+	    }
+	}
+
+      /* Then divide `accepts' into DFA states, or create a new
+	 state.  Above, we make sure that accepts is not empty.  */
+      for (j = 0; j < ndests; ++j)
+	{
+	  bitset_t intersec; /* Intersection sets, see below.  */
+	  bitset_t remains;
+	  /* Flags, see below.  */
+	  bitset_word_t has_intersec, not_subset, not_consumed;
+
+	  /* Optimization, skip if this state doesn't accept the character.  */
+	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
+	    continue;
+
+	  /* Enumerate the intersection set of this state and `accepts'.  */
+	  has_intersec = 0;
+	  for (k = 0; k < BITSET_WORDS; ++k)
+	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
+	  /* And skip if the intersection set is empty.  */
+	  if (!has_intersec)
+	    continue;
+
+	  /* Then check if this state is a subset of `accepts'.  */
+	  not_subset = not_consumed = 0;
+	  for (k = 0; k < BITSET_WORDS; ++k)
+	    {
+	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
+	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
+	    }
+
+	  /* If this state isn't a subset of `accepts', create a
+	     new group state, which has the `remains'. */
+	  if (not_subset)
+	    {
+	      bitset_copy (dests_ch[ndests], remains);
+	      bitset_copy (dests_ch[j], intersec);
+	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+	      if (BE (err != REG_NOERROR, 0))
+		goto error_return;
+	      ++ndests;
+	    }
+
+	  /* Put the position in the current group. */
+	  result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+	  if (BE (result < 0, 0))
+	    goto error_return;
+
+	  /* If all characters are consumed, go to next node. */
+	  if (!not_consumed)
+	    break;
+	}
+      /* Some characters remain, create a new group. */
+      if (j == ndests)
+	{
+	  bitset_copy (dests_ch[ndests], accepts);
+	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+	  if (BE (err != REG_NOERROR, 0))
+	    goto error_return;
+	  ++ndests;
+	  bitset_empty (accepts);
+	}
+    }
+  return ndests;
+ error_return:
+  for (j = 0; j < ndests; ++j)
+    re_node_set_free (dests_node + j);
+  return -1;
+}
+
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+   Return the number of the bytes the node accepts.
+   STR_IDX is the current index of the input string.
+
+   This function handles the nodes which can accept one character, or
+   one collating element like '.', '[a-z]', opposite to the other nodes
+   can only accept one byte.  */
+
+static int
+internal_function
+check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+			 const re_string_t *input, int str_idx)
+{
+  const re_token_t *node = dfa->nodes + node_idx;
+  int char_len, elem_len;
+  int i;
+
+  if (BE (node->type == OP_UTF8_PERIOD, 0))
+    {
+      unsigned char c = re_string_byte_at (input, str_idx), d;
+      if (BE (c < 0xc2, 1))
+	return 0;
+
+      if (str_idx + 2 > input->len)
+	return 0;
+
+      d = re_string_byte_at (input, str_idx + 1);
+      if (c < 0xe0)
+	return (d < 0x80 || d > 0xbf) ? 0 : 2;
+      else if (c < 0xf0)
+	{
+	  char_len = 3;
+	  if (c == 0xe0 && d < 0xa0)
+	    return 0;
+	}
+      else if (c < 0xf8)
+	{
+	  char_len = 4;
+	  if (c == 0xf0 && d < 0x90)
+	    return 0;
+	}
+      else if (c < 0xfc)
+	{
+	  char_len = 5;
+	  if (c == 0xf8 && d < 0x88)
+	    return 0;
+	}
+      else if (c < 0xfe)
+	{
+	  char_len = 6;
+	  if (c == 0xfc && d < 0x84)
+	    return 0;
+	}
+      else
+	return 0;
+
+      if (str_idx + char_len > input->len)
+	return 0;
+
+      for (i = 1; i < char_len; ++i)
+	{
+	  d = re_string_byte_at (input, str_idx + i);
+	  if (d < 0x80 || d > 0xbf)
+	    return 0;
+	}
+      return char_len;
+    }
+
+  char_len = re_string_char_size_at (input, str_idx);
+  if (node->type == OP_PERIOD)
+    {
+      if (char_len <= 1)
+        return 0;
+      /* FIXME: I don't think this if is needed, as both '\n'
+	 and '\0' are char_len == 1.  */
+      /* '.' accepts any one character except the following two cases.  */
+      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
+	   re_string_byte_at (input, str_idx) == '\n') ||
+	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
+	   re_string_byte_at (input, str_idx) == '\0'))
+	return 0;
+      return char_len;
+    }
+
+  elem_len = re_string_elem_size_at (input, str_idx);
+  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
+    return 0;
+
+  if (node->type == COMPLEX_BRACKET)
+    {
+      const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
+      const unsigned char *pin
+	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
+      int j;
+      uint32_t nrules;
+# endif /* _LIBC */
+      int match_len = 0;
+      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+		    ? re_string_wchar_at (input, str_idx) : 0);
+
+      /* match with multibyte character?  */
+      for (i = 0; i < cset->nmbchars; ++i)
+	if (wc == cset->mbchars[i])
+	  {
+	    match_len = char_len;
+	    goto check_node_accept_bytes_match;
+	  }
+      /* match with character_class?  */
+      for (i = 0; i < cset->nchar_classes; ++i)
+	{
+	  wctype_t wt = cset->char_classes[i];
+	  if (__iswctype (wc, wt))
+	    {
+	      match_len = char_len;
+	      goto check_node_accept_bytes_match;
+	    }
+	}
+
+# ifdef _LIBC
+      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+      if (nrules != 0)
+	{
+	  unsigned int in_collseq = 0;
+	  const int32_t *table, *indirect;
+	  const unsigned char *weights, *extra;
+	  const char *collseqwc;
+	  int32_t idx;
+	  /* This #include defines a local function!  */
+#  include <locale/weight.h>
+
+	  /* match with collating_symbol?  */
+	  if (cset->ncoll_syms)
+	    extra = (const unsigned char *)
+	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+	  for (i = 0; i < cset->ncoll_syms; ++i)
+	    {
+	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
+	      /* Compare the length of input collating element and
+		 the length of current collating element.  */
+	      if (*coll_sym != elem_len)
+		continue;
+	      /* Compare each bytes.  */
+	      for (j = 0; j < *coll_sym; j++)
+		if (pin[j] != coll_sym[1 + j])
+		  break;
+	      if (j == *coll_sym)
+		{
+		  /* Match if every bytes is equal.  */
+		  match_len = j;
+		  goto check_node_accept_bytes_match;
+		}
+	    }
+
+	  if (cset->nranges)
+	    {
+	      if (elem_len <= char_len)
+		{
+		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+		  in_collseq = __collseq_table_lookup (collseqwc, wc);
+		}
+	      else
+		in_collseq = find_collation_sequence_value (pin, elem_len);
+	    }
+	  /* match with range expression?  */
+	  for (i = 0; i < cset->nranges; ++i)
+	    if (cset->range_starts[i] <= in_collseq
+		&& in_collseq <= cset->range_ends[i])
+	      {
+		match_len = elem_len;
+		goto check_node_accept_bytes_match;
+	      }
+
+	  /* match with equivalence_class?  */
+	  if (cset->nequiv_classes)
+	    {
+	      const unsigned char *cp = pin;
+	      table = (const int32_t *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+	      weights = (const unsigned char *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+	      extra = (const unsigned char *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+	      indirect = (const int32_t *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+	      idx = findidx (&cp);
+	      if (idx > 0)
+		for (i = 0; i < cset->nequiv_classes; ++i)
+		  {
+		    int32_t equiv_class_idx = cset->equiv_classes[i];
+		    size_t weight_len = weights[idx];
+		    if (weight_len == weights[equiv_class_idx])
+		      {
+			int cnt = 0;
+			while (cnt <= weight_len
+			       && (weights[equiv_class_idx + 1 + cnt]
+				   == weights[idx + 1 + cnt]))
+			  ++cnt;
+			if (cnt > weight_len)
+			  {
+			    match_len = elem_len;
+			    goto check_node_accept_bytes_match;
+			  }
+		      }
+		  }
+	    }
+	}
+      else
+# endif /* _LIBC */
+	{
+	  /* match with range expression?  */
+#if __GNUC__ >= 2
+	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+#else
+	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+	  cmp_buf[2] = wc;
+#endif
+	  for (i = 0; i < cset->nranges; ++i)
+	    {
+	      cmp_buf[0] = cset->range_starts[i];
+	      cmp_buf[4] = cset->range_ends[i];
+	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+		{
+		  match_len = char_len;
+		  goto check_node_accept_bytes_match;
+		}
+	    }
+	}
+    check_node_accept_bytes_match:
+      if (!cset->non_match)
+	return match_len;
+      else
+	{
+	  if (match_len > 0)
+	    return 0;
+	  else
+	    return (elem_len > char_len) ? elem_len : char_len;
+	}
+    }
+  return 0;
+}
+
+# ifdef _LIBC
+static unsigned int
+internal_function
+find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
+{
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules == 0)
+    {
+      if (mbs_len == 1)
+	{
+	  /* No valid character.  Match it as a single byte character.  */
+	  const unsigned char *collseq = (const unsigned char *)
+	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+	  return collseq[mbs[0]];
+	}
+      return UINT_MAX;
+    }
+  else
+    {
+      int32_t idx;
+      const unsigned char *extra = (const unsigned char *)
+	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+      int32_t extrasize = (const unsigned char *)
+	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
+
+      for (idx = 0; idx < extrasize;)
+	{
+	  int mbs_cnt, found = 0;
+	  int32_t elem_mbs_len;
+	  /* Skip the name of collating element name.  */
+	  idx = idx + extra[idx] + 1;
+	  elem_mbs_len = extra[idx++];
+	  if (mbs_len == elem_mbs_len)
+	    {
+	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
+		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
+		  break;
+	      if (mbs_cnt == elem_mbs_len)
+		/* Found the entry.  */
+		found = 1;
+	    }
+	  /* Skip the byte sequence of the collating element.  */
+	  idx += elem_mbs_len;
+	  /* Adjust for the alignment.  */
+	  idx = (idx + 3) & ~3;
+	  /* Skip the collation sequence value.  */
+	  idx += sizeof (uint32_t);
+	  /* Skip the wide char sequence of the collating element.  */
+	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+	  /* If we found the entry, return the sequence value.  */
+	  if (found)
+	    return *(uint32_t *) (extra + idx);
+	  /* Skip the collation sequence value.  */
+	  idx += sizeof (uint32_t);
+	}
+      return UINT_MAX;
+    }
+}
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+
+/* Check whether the node accepts the byte which is IDX-th
+   byte of the INPUT.  */
+
+static int
+internal_function
+check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
+		   int idx)
+{
+  unsigned char ch;
+  ch = re_string_byte_at (&mctx->input, idx);
+  switch (node->type)
+    {
+    case CHARACTER:
+      if (node->opr.c != ch)
+        return 0;
+      break;
+
+    case SIMPLE_BRACKET:
+      if (!bitset_contain (node->opr.sbcset, ch))
+        return 0;
+      break;
+
+#ifdef RE_ENABLE_I18N
+    case OP_UTF8_PERIOD:
+      if (ch >= 0x80)
+        return 0;
+      /* FALLTHROUGH */
+#endif
+    case OP_PERIOD:
+      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
+	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
+	return 0;
+      break;
+
+    default:
+      return 0;
+    }
+
+  if (node->constraint)
+    {
+      /* The node has constraints.  Check whether the current context
+	 satisfies the constraints.  */
+      unsigned int context = re_string_context_at (&mctx->input, idx,
+						   mctx->eflags);
+      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+	return 0;
+    }
+
+  return 1;
+}
+
+/* Extend the buffers, if the buffers have run out.  */
+
+static reg_errcode_t
+internal_function
+extend_buffers (re_match_context_t *mctx)
+{
+  reg_errcode_t ret;
+  re_string_t *pstr = &mctx->input;
+
+  /* Double the lengthes of the buffers.  */
+  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  if (mctx->state_log != NULL)
+    {
+      /* And double the length of state_log.  */
+      /* XXX We have no indication of the size of this buffer.  If this
+	 allocation fail we have no indication that the state_log array
+	 does not have the right size.  */
+      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
+					      pstr->bufs_len + 1);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      mctx->state_log = new_array;
+    }
+
+  /* Then reconstruct the buffers.  */
+  if (pstr->icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	{
+	  ret = build_wcs_upper_buffer (pstr);
+	  if (BE (ret != REG_NOERROR, 0))
+	    return ret;
+	}
+      else
+#endif /* RE_ENABLE_I18N  */
+	build_upper_buffer (pstr);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+	{
+	  if (pstr->trans != NULL)
+	    re_string_translate_buffer (pstr);
+	}
+    }
+  return REG_NOERROR;
+}
+
+
+/* Functions for matching context.  */
+
+/* Initialize MCTX.  */
+
+static reg_errcode_t
+internal_function
+match_ctx_init (re_match_context_t *mctx, int eflags, int n)
+{
+  mctx->eflags = eflags;
+  mctx->match_last = -1;
+  if (n > 0)
+    {
+      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
+      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
+	return REG_ESPACE;
+    }
+  /* Already zero-ed by the caller.
+     else
+       mctx->bkref_ents = NULL;
+     mctx->nbkref_ents = 0;
+     mctx->nsub_tops = 0;  */
+  mctx->abkref_ents = n;
+  mctx->max_mb_elem_len = 1;
+  mctx->asub_tops = n;
+  return REG_NOERROR;
+}
+
+/* Clean the entries which depend on the current input in MCTX.
+   This function must be invoked when the matcher changes the start index
+   of the input, or changes the input string.  */
+
+static void
+internal_function
+match_ctx_clean (re_match_context_t *mctx)
+{
+  int st_idx;
+  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
+    {
+      int sl_idx;
+      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
+      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
+	{
+	  re_sub_match_last_t *last = top->lasts[sl_idx];
+	  re_free (last->path.array);
+	  re_free (last);
+	}
+      re_free (top->lasts);
+      if (top->path)
+	{
+	  re_free (top->path->array);
+	  re_free (top->path);
+	}
+      free (top);
+    }
+
+  mctx->nsub_tops = 0;
+  mctx->nbkref_ents = 0;
+}
+
+/* Free all the memory associated with MCTX.  */
+
+static void
+internal_function
+match_ctx_free (re_match_context_t *mctx)
+{
+  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
+  match_ctx_clean (mctx);
+  re_free (mctx->sub_tops);
+  re_free (mctx->bkref_ents);
+}
+
+/* Add a new backreference entry to MCTX.
+   Note that we assume that caller never call this function with duplicate
+   entry, and call with STR_IDX which isn't smaller than any existing entry.
+*/
+
+static reg_errcode_t
+internal_function
+match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
+		     int to)
+{
+  if (mctx->nbkref_ents >= mctx->abkref_ents)
+    {
+      struct re_backref_cache_entry* new_entry;
+      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
+			      mctx->abkref_ents * 2);
+      if (BE (new_entry == NULL, 0))
+	{
+	  re_free (mctx->bkref_ents);
+	  return REG_ESPACE;
+	}
+      mctx->bkref_ents = new_entry;
+      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
+	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
+      mctx->abkref_ents *= 2;
+    }
+  if (mctx->nbkref_ents > 0
+      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
+    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
+
+  mctx->bkref_ents[mctx->nbkref_ents].node = node;
+  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
+  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
+  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+
+  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
+     If bit N is clear, means that this entry won't epsilon-transition to
+     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
+     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
+     such node.
+
+     A backreference does not epsilon-transition unless it is empty, so set
+     to all zeros if FROM != TO.  */
+  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
+    = (from == to ? ~0 : 0);
+
+  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
+  if (mctx->max_mb_elem_len < to - from)
+    mctx->max_mb_elem_len = to - from;
+  return REG_NOERROR;
+}
+
+/* Search for the first entry which has the same str_idx, or -1 if none is
+   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
+
+static int
+internal_function
+search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+{
+  int left, right, mid, last;
+  last = right = mctx->nbkref_ents;
+  for (left = 0; left < right;)
+    {
+      mid = (left + right) / 2;
+      if (mctx->bkref_ents[mid].str_idx < str_idx)
+	left = mid + 1;
+      else
+	right = mid;
+    }
+  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
+    return left;
+  else
+    return -1;
+}
+
+/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
+   at STR_IDX.  */
+
+static reg_errcode_t
+internal_function
+match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
+{
+#ifdef DEBUG
+  assert (mctx->sub_tops != NULL);
+  assert (mctx->asub_tops > 0);
+#endif
+  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
+    {
+      int new_asub_tops = mctx->asub_tops * 2;
+      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
+						   re_sub_match_top_t *,
+						   new_asub_tops);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      mctx->sub_tops = new_array;
+      mctx->asub_tops = new_asub_tops;
+    }
+  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
+  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
+    return REG_ESPACE;
+  mctx->sub_tops[mctx->nsub_tops]->node = node;
+  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
+  return REG_NOERROR;
+}
+
+/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
+   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
+
+static re_sub_match_last_t *
+internal_function
+match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
+{
+  re_sub_match_last_t *new_entry;
+  if (BE (subtop->nlasts == subtop->alasts, 0))
+    {
+      int new_alasts = 2 * subtop->alasts + 1;
+      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
+						    re_sub_match_last_t *,
+						    new_alasts);
+      if (BE (new_array == NULL, 0))
+	return NULL;
+      subtop->lasts = new_array;
+      subtop->alasts = new_alasts;
+    }
+  new_entry = calloc (1, sizeof (re_sub_match_last_t));
+  if (BE (new_entry != NULL, 1))
+    {
+      subtop->lasts[subtop->nlasts] = new_entry;
+      new_entry->node = node;
+      new_entry->str_idx = str_idx;
+      ++subtop->nlasts;
+    }
+  return new_entry;
+}
+
+static void
+internal_function
+sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+	       re_dfastate_t **limited_sts, int last_node, int last_str_idx)
+{
+  sctx->sifted_states = sifted_sts;
+  sctx->limited_states = limited_sts;
+  sctx->last_node = last_node;
+  sctx->last_str_idx = last_str_idx;
+  re_node_set_init_empty (&sctx->limits);
+}
+
+
+/* Binary backward compatibility.  */
+#if _LIBC
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
+int re_max_failures = 2000;
+# endif
+#endif
+#endif
diff --git a/gkregex.h b/gkregex.h
new file mode 100644
index 0000000..807c404
--- /dev/null
+++ b/gkregex.h
@@ -0,0 +1,556 @@
+/* Definitions for data structures and routines for the regular
+   expression library.
+   Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+#include <sys/types.h>
+
+/* Allow the use in C++ code.  */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+   wide enough to hold a value of a pointer.  For most ANSI compilers
+   ptrdiff_t and size_t should be likely OK.  Still size of these two
+   types is 2 for Microsoft C.  Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+   recognize.  The set/not-set meanings are chosen so that Emacs syntax
+   remains the value 0.  The bits are given in alphabetical order, and
+   the definitions shifted by one from the previous bit; thus, when we
+   add or remove a bit, only one other definition need change.  */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+   If set, then such a \ quotes the following character.  */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+     literals.
+   If set, then \+ and \? are operators and + and ? are literals.  */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported.  They are:
+     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+   If not set, then character classes are not supported.  */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+     expressions, of course).
+   If this bit is not set, then it depends:
+        ^  is an anchor if it is at the beginning of a regular
+           expression or after an open-group or an alternation operator;
+        $  is an anchor if it is at the end of a regular expression, or
+           before a close-group or an alternation operator.
+
+   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+   POSIX draft 11.2 says that * etc. in leading positions is undefined.
+   We already implemented a previous draft which made those constructs
+   invalid, though, so we haven't changed the code back.  */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+     regardless of where they are in the pattern.
+   If this bit is not set, then special characters are special only in
+     some contexts; otherwise they are ordinary.  Specifically,
+     * + ? and intervals are only special when not after the beginning,
+     open-group, or alternation operator.  */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+     immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+   If not set, then it doesn't.  */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+   If not set, then it does.  */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+   If not set, they do.  */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+     interval, depending on RE_NO_BK_BRACES.
+   If not set, \{, \}, {, and } are literals.  */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+   If not set, they are.  */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+   If not set, newline is literal.  */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+     are literals.
+  If not set, then `\{...\}' defines an interval.  */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+   If not set, \(...\) defines a group, and ( and ) are literals.  */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+   If not set, then \<digit> is a back-reference.  */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+   If not set, then \| is an alternation operator, and | is literal.  */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+     than the starting range point, as in [z-a], is invalid.
+   If not set, then when ending range point collates higher than the
+     starting range point, the range is ignored.  */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+   If not set, then an unmatched ) is invalid.  */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+   without further backtracking.  */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+   If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+   If not set, and debugging was on, turn it off.
+   This only works if regex.c is compiled -DDEBUG.
+   We define this bit always, so that all that's needed to turn on
+   debugging is to recompile regex.c; the calling code can always have
+   this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+   a string of ordinary characters.  For example, the ERE 'a{1' is
+   treated as 'a\{1'.  */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+   for ^, because it is difficult to scan the regex backwards to find
+   whether ^ should be special.  */
+#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+   immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* If this bit is set, then no_sub will be set to 1 during
+   re_compile_pattern.  */
+#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+   some interfaces).  When a regexp is compiled, the syntax used is
+   stored in the pattern buffer, so changing this does not affect
+   already-compiled regexps.  */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+   (The [[[ comments delimit what gets put into the Texinfo file, so
+   don't delete them!)  */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK							\
+  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
+   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
+   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
+   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK						\
+  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
+   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS		\
+       | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK						\
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
+   | RE_INTERVALS	    | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP							\
+  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
+   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
+   | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP							\
+  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
+   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
+   | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP						\
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
+   | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax.  */
+#define _RE_SYNTAX_POSIX_COMMON						\
+  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
+   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC						\
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+   isn't minimal, since other operators, such as \`, aren't disabled.  */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED					\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
+   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+   removed and RE_NO_BK_REFS is added.  */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow.  Some systems
+   (erroneously) define this in other header files, but we want our
+   value, so remove any previous define.  */
+#ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp').  */
+
+/* If this bit is set, then use extended regular expression syntax.
+   If not set, then use basic regular expression syntax.  */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+     characters in the string.
+   If not set, then anchors do match at newlines.  */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+   If not set, then returns differ between not matching and errors.  */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec).  */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+     the beginning of the string (presumably because it's not the
+     beginning of a line).
+   If not set, then the beginning-of-line operator does match the
+     beginning of the string.  */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line.  */
+#define REG_NOTEOL (1 << 1)
+
+/* Use PMATCH[0] to delimit the start and end of the search in the
+   buffer.  */
+#define REG_STARTEND (1 << 2)
+
+
+/* If any error codes are removed, changed, or added, update the
+   `re_error_msg' table in regex.c.  */
+typedef enum
+{
+#ifdef _XOPEN_SOURCE
+  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
+#endif
+
+  REG_NOERROR = 0,	/* Success.  */
+  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
+
+  /* POSIX regcomp return error codes.  (In the order listed in the
+     standard.)  */
+  REG_BADPAT,		/* Invalid pattern.  */
+  REG_ECOLLATE,		/* Inalid collating element.  */
+  REG_ECTYPE,		/* Invalid character class name.  */
+  REG_EESCAPE,		/* Trailing backslash.  */
+  REG_ESUBREG,		/* Invalid back reference.  */
+  REG_EBRACK,		/* Unmatched left bracket.  */
+  REG_EPAREN,		/* Parenthesis imbalance.  */
+  REG_EBRACE,		/* Unmatched \{.  */
+  REG_BADBR,		/* Invalid contents of \{\}.  */
+  REG_ERANGE,		/* Invalid range end.  */
+  REG_ESPACE,		/* Ran out of memory.  */
+  REG_BADRPT,		/* No preceding re for repetition op.  */
+
+  /* Error codes we've added.  */
+  REG_EEND,		/* Premature end.  */
+  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
+  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern.  Before calling
+   the pattern compiler, the fields `buffer', `allocated', `fastmap',
+   `translate', and `no_sub' can be set.  After the pattern has been
+   compiled, the `re_nsub' field is available.  All other fields are
+   private to the regex routines.  */
+
+#ifndef RE_TRANSLATE_TYPE
+# define RE_TRANSLATE_TYPE unsigned char *
+#endif
+
+struct re_pattern_buffer
+{
+  /* Space that holds the compiled pattern.  It is declared as
+     `unsigned char *' because its elements are sometimes used as
+     array indexes.  */
+  unsigned char *buffer;
+
+  /* Number of bytes to which `buffer' points.  */
+  unsigned long int allocated;
+
+  /* Number of bytes actually used in `buffer'.  */
+  unsigned long int used;
+
+  /* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t syntax;
+
+  /* Pointer to a fastmap, if any, otherwise zero.  re_search uses the
+     fastmap, if there is one, to skip over impossible starting points
+     for matches.  */
+  char *fastmap;
+
+  /* Either a translate table to apply to all characters before
+     comparing them, or zero for no translation.  The translation is
+     applied to a pattern when it is compiled and to a string when it
+     is matched.  */
+  RE_TRANSLATE_TYPE translate;
+
+  /* Number of subexpressions found by the compiler.  */
+  size_t re_nsub;
+
+  /* Zero if this pattern cannot match the empty string, one else.
+     Well, in truth it's used only in `re_search_2', to see whether or
+     not we should use the fastmap, so we don't set this absolutely
+     perfectly; see `re_compile_fastmap' (the `duplicate' case).  */
+  unsigned can_be_null : 1;
+
+  /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+     for `max (RE_NREGS, re_nsub + 1)' groups.
+     If REGS_REALLOCATE, reallocate space if necessary.
+     If REGS_FIXED, use what's there.  */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+  unsigned regs_allocated : 2;
+
+  /* Set to zero when `regex_compile' compiles a pattern; set to one
+     by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned fastmap_accurate : 1;
+
+  /* If set, `re_match_2' does not return information about
+     subexpressions.  */
+  unsigned no_sub : 1;
+
+  /* If set, a beginning-of-line anchor doesn't match at the beginning
+     of the string.  */
+  unsigned not_bol : 1;
+
+  /* Similarly for an end-of-line anchor.  */
+  unsigned not_eol : 1;
+
+  /* If true, an anchor at a newline matches.  */
+  unsigned newline_anchor : 1;
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string.  POSIX mandates this.  */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in.  See
+   regex.texinfo for a full description of what registers match.  */
+struct re_registers
+{
+  unsigned num_regs;
+  regoff_t *start;
+  regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+   `re_match_2' returns information about at least this many registers
+   the first time a `regs' structure is passed.  */
+#ifndef RE_NREGS
+# define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers.  Aside from the different names than
+   `re_registers', POSIX uses an array of structures, instead of a
+   structure of arrays.  */
+typedef struct
+{
+  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+} regmatch_t;
+
+/* Declarations for routines.  */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+   You can also simply assign to the `re_syntax_options' variable.  */
+extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
+
+/* Compile the regular expression PATTERN, with length LENGTH
+   and syntax given by the global `re_syntax_options', into the buffer
+   BUFFER.  Return NULL if successful, and an error string if not.  */
+extern const char *re_compile_pattern (const char *__pattern, size_t __length,
+				       struct re_pattern_buffer *__buffer);
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+   accelerate searches.  Return 0 if successful and -2 if was an
+   internal error.  */
+extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+   compiled into BUFFER.  Start searching at position START, for RANGE
+   characters.  Return the starting position of the match, -1 for no
+   match, or -2 for an internal error.  Also return register
+   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+extern int re_search (struct re_pattern_buffer *__buffer, const char *__string,
+		      int __length, int __start, int __range,
+		      struct re_registers *__regs);
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+   STRING2.  Also, stop searching at index START + STOP.  */
+extern int re_search_2 (struct re_pattern_buffer *__buffer,
+			const char *__string1, int __length1,
+			const char *__string2, int __length2, int __start,
+			int __range, struct re_registers *__regs, int __stop);
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+   in BUFFER matched, starting at position START.  */
+extern int re_match (struct re_pattern_buffer *__buffer, const char *__string,
+		     int __length, int __start, struct re_registers *__regs);
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+extern int re_match_2 (struct re_pattern_buffer *__buffer,
+		       const char *__string1, int __length1,
+		       const char *__string2, int __length2, int __start,
+		       struct re_registers *__regs, int __stop);
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+   for recording register information.  STARTS and ENDS must be
+   allocated with malloc, and must each be at least `NUM_REGS * sizeof
+   (regoff_t)' bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+extern void re_set_registers (struct re_pattern_buffer *__buffer,
+			      struct re_registers *__regs,
+			      unsigned int __num_regs,
+			      regoff_t *__starts, regoff_t *__ends);
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
+/* 4.2 bsd compatibility.  */
+extern char *re_comp (const char *);
+extern int re_exec (const char *);
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+   "restrict", and "configure" may have defined "restrict".  */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+#  if defined restrict || 199901L <= __STDC_VERSION__
+#   define __restrict restrict
+#  else
+#   define __restrict
+#  endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax.  */
+#ifndef __restrict_arr
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
+     && !defined __GNUG__
+#  define __restrict_arr __restrict
+# else
+#  define __restrict_arr
+# endif
+#endif
+
+/* POSIX compatibility.  */
+extern int regcomp (regex_t *__restrict __preg,
+		    const char *__restrict __pattern,
+		    int __cflags);
+
+extern int regexec (const regex_t *__restrict __preg,
+		    const char *__restrict __string, size_t __nmatch,
+		    regmatch_t __pmatch[__restrict_arr],
+		    int __eflags);
+
+extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
+			char *__restrict __errbuf, size_t __errbuf_size);
+
+extern void regfree (regex_t *__preg);
+
+
+#ifdef __cplusplus
+}
+#endif	/* C++ */
+
+#endif /* regex.h */
diff --git a/graph.c b/graph.c
new file mode 100644
index 0000000..fa40f07
--- /dev/null
+++ b/graph.c
@@ -0,0 +1,1940 @@
+/*!
+ * \file 
+ *
+ * \brief Various routines with dealing with sparse graphs 
+ *
+ * \author George Karypis
+ * \version\verbatim $Id: graph.c 22415 2019-09-05 16:55:00Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+#define OMPMINOPS       50000
+
+/*************************************************************************/
+/*! Allocate memory for a graph and initializes it 
+    \returns the allocated graph. The various fields are set to NULL.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Create()
+{
+  gk_graph_t *graph;
+
+  graph = (gk_graph_t *)gk_malloc(sizeof(gk_graph_t), "gk_graph_Create: graph");
+
+  gk_graph_Init(graph);
+
+  return graph;
+}
+
+
+/*************************************************************************/
+/*! Initializes the graph.
+    \param graph is the graph to be initialized.
+*/
+/*************************************************************************/
+void gk_graph_Init(gk_graph_t *graph)
+{
+  memset(graph, 0, sizeof(gk_graph_t));
+  graph->nvtxs = -1;
+}
+
+
+/*************************************************************************/
+/*! Frees all the memory allocated for a graph.
+    \param graph is the graph to be freed.
+*/
+/*************************************************************************/
+void gk_graph_Free(gk_graph_t **graph)
+{
+  if (*graph == NULL)
+    return;
+  gk_graph_FreeContents(*graph);
+  gk_free((void **)graph, LTERM);
+}
+
+
+/*************************************************************************/
+/*! Frees only the memory allocated for the graph's different fields and
+    sets them to NULL.
+    \param graph is the graph whose contents will be freed.
+*/    
+/*************************************************************************/
+void gk_graph_FreeContents(gk_graph_t *graph)
+{
+  gk_free((void *)&graph->xadj, &graph->adjncy, 
+          &graph->iadjwgt, &graph->fadjwgt,
+          &graph->ivwgts, &graph->fvwgts,
+          &graph->ivsizes, &graph->fvsizes,
+          &graph->vlabels, 
+          LTERM);
+}
+
+
+/**************************************************************************/
+/*! Reads a sparse graph from the supplied file 
+    \param filename is the file that stores the data.
+    \param format is the graph format. The supported values are:
+           GK_GRAPH_FMT_METIS, GK_GRAPH_FMT_IJV.
+    \param hasvals is 1 if the input file has values
+    \param numbering is 1 if the input file numbering starts from one
+    \param isfewgts is 1 if the edge-weights should be read as floats
+    \param isfvwgts is 1 if the vertex-weights should be read as floats
+    \param isfvsizes is 1 if the vertex-sizes should be read as floats
+    \returns the graph that was read.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Read(char *filename, int format, int hasvals, 
+                int numbering, int isfewgts, int isfvwgts, int isfvsizes)
+{
+  ssize_t i, k, l;
+  size_t nfields, nvtxs, nedges, fmt, ncon, lnlen;
+  ssize_t *xadj;
+  int32_t ival, *iinds=NULL, *jinds=NULL, *ivals=NULL, *adjncy, *iadjwgt;
+  float fval, *fvals=NULL, *fadjwgt;
+  int readsizes=0, readwgts=0, readvals=0;
+  char *line=NULL, *head, *tail, fmtstr[256];
+  FILE *fpin=NULL;
+  gk_graph_t *graph=NULL;
+
+
+  if (!gk_fexists(filename)) 
+    gk_errexit(SIGERR, "File %s does not exist!\n", filename);
+
+  switch (format) {
+    case GK_GRAPH_FMT_METIS:
+      fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin");
+      do {
+        if (gk_getline(&line, &lnlen, fpin) <= 0)
+          gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
+      } while (line[0] == '%');
+
+      fmt = ncon = 0;
+      nfields = sscanf(line, "%zu %zu %zu %zu", &nvtxs, &nedges, &fmt, &ncon);
+      if (nfields < 2)
+        gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n");
+
+      nedges *= 2;
+
+      if (fmt > 111)
+        gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt);
+
+      sprintf(fmtstr, "%03zu", fmt%1000);
+      readsizes = (fmtstr[0] == '1');
+      readwgts  = (fmtstr[1] == '1');
+      readvals  = (fmtstr[2] == '1');
+      numbering = 1;
+      ncon      = (ncon == 0 ? 1 : ncon);
+
+      graph = gk_graph_Create();
+    
+      graph->nvtxs = nvtxs;
+    
+      graph->xadj   = gk_zmalloc(nvtxs+1, "gk_graph_Read: xadj");
+      graph->adjncy = gk_i32malloc(nedges, "gk_graph_Read: adjncy");
+      if (readvals) {
+        if (isfewgts)
+          graph->fadjwgt = gk_fmalloc(nedges, "gk_graph_Read: fadjwgt");
+        else
+          graph->iadjwgt = gk_i32malloc(nedges, "gk_graph_Read: iadjwgt");
+      }
+    
+      if (readsizes) {
+        if (isfvsizes)
+          graph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Read: fvsizes");
+        else
+          graph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Read: ivsizes");
+      }
+    
+      if (readwgts) {
+        if (isfvwgts)
+          graph->fvwgts = gk_fmalloc(nvtxs*ncon, "gk_graph_Read: fvwgts");
+        else
+          graph->ivwgts = gk_i32malloc(nvtxs*ncon, "gk_graph_Read: ivwgts");
+      }
+    
+    
+      /*----------------------------------------------------------------------
+       * Read the sparse graph file
+       *---------------------------------------------------------------------*/
+      numbering = (numbering ? - 1 : 0);
+      for (graph->xadj[0]=0, k=0, i=0; i<nvtxs; i++) {
+        do {
+          if (gk_getline(&line, &lnlen, fpin) == -1)
+            gk_errexit(SIGERR, "Pregraphure end of input file: file while reading row %d\n", i);
+        } while (line[0] == '%');
+    
+        head = line;
+        tail = NULL;
+    
+        /* Read vertex sizes */
+        if (readsizes) {
+          if (isfvsizes) {
+#ifdef __MSC__
+            graph->fvsizes[i] = (float)strtod(head, &tail);
+#else
+            graph->fvsizes[i] = strtof(head, &tail);
+#endif
+            if (tail == head)
+              gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
+            if (graph->fvsizes[i] < 0)
+              gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1);
+          }
+          else {
+            graph->ivsizes[i] = strtol(head, &tail, 0);
+            if (tail == head)
+              gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
+            if (graph->ivsizes[i] < 0)
+              gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1);
+          }
+          head = tail;
+        }
+    
+        /* Read vertex weights */
+        if (readwgts) {
+          for (l=0; l<ncon; l++) {
+            if (isfvwgts) {
+#ifdef __MSC__
+              graph->fvwgts[i*ncon+l] = (float)strtod(head, &tail);
+#else
+              graph->fvwgts[i*ncon+l] = strtof(head, &tail);
+#endif
+              if (tail == head)
+                gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights "
+                        "for the %d constraints.\n", i+1, ncon);
+              if (graph->fvwgts[i*ncon+l] < 0)
+                gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
+            }
+            else {
+              graph->ivwgts[i*ncon+l] = strtol(head, &tail, 0);
+              if (tail == head)
+                gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights "
+                        "for the %d constraints.\n", i+1, ncon);
+              if (graph->ivwgts[i*ncon+l] < 0)
+                gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
+            }
+            head = tail;
+          }
+        }
+    
+       
+        /* Read the rest of the row */
+        while (1) {
+          ival = (int)strtol(head, &tail, 0);
+          if (tail == head) 
+            break;
+          head = tail;
+          
+          if ((graph->adjncy[k] = ival + numbering) < 0)
+            gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i);
+    
+          if (readvals) {
+            if (isfewgts) {
+#ifdef __MSC__
+              fval = (float)strtod(head, &tail);
+#else
+        	  fval = strtof(head, &tail);
+#endif
+              if (tail == head)
+                gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k);
+    
+              graph->fadjwgt[k] = fval;
+            }
+            else {
+        	  ival = strtol(head, &tail, 0);
+              if (tail == head)
+                gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k);
+    
+              graph->iadjwgt[k] = ival;
+            }
+            head = tail;
+          }
+          k++;
+        }
+        graph->xadj[i+1] = k;
+      }
+    
+      if (k != nedges)
+        gk_errexit(SIGERR, "gk_graph_Read: Something wrong with the number of edges in "
+                           "the input file. nedges=%zd, Actualnedges=%zd.\n", nedges, k);
+    
+      gk_fclose(fpin);
+  
+      gk_free((void **)&line, LTERM);
+
+      break;
+
+    case GK_GRAPH_FMT_IJV:
+    case GK_GRAPH_FMT_HIJV:
+      gk_getfilestats(filename, &nvtxs, &nedges, NULL, NULL);
+
+      if (format == GK_GRAPH_FMT_HIJV) { /* remove the #rows/#cols values and row */
+        nedges -= 2; 
+        nvtxs  -= 1;
+      }
+
+      if (hasvals == 1 && 3*nvtxs != nedges)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 3.\n", nedges, hasvals);
+      if (hasvals == 0 && 2*nvtxs != nedges)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 2.\n", nedges, hasvals);
+
+      nedges = nvtxs;
+      numbering = (numbering ? -1 : 0);
+
+      /* read the data into three arrays */
+      iinds = gk_i32malloc(nedges, "iinds");
+      jinds = gk_i32malloc(nedges, "jinds");
+      if (hasvals) {
+        if (isfewgts)
+          fvals = gk_fmalloc(nedges, "fvals");
+        else
+          ivals = gk_i32malloc(nedges, "ivals");
+      }
+
+      fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin");
+
+      if (format == GK_GRAPH_FMT_HIJV) { /* read and ignore the #rows/#cols values */
+        if (fscanf(fpin, "%zd %zd", &i, &i) != 2)
+          gk_errexit(SIGERR, "Error: Failed to read the header line.\n");
+      }
+
+      for (nvtxs=0, i=0; i<nedges; i++) {
+        if (hasvals) {
+          if (isfewgts) {
+            if (fscanf(fpin, "%"PRId32" %"PRId32" %f", &iinds[i], &jinds[i], &fvals[i]) != 3)
+              gk_errexit(SIGERR, "Error: Failed to read (i, j, val) for nedge: %zd.\n", i);
+          }
+          else {
+            if (fscanf(fpin, "%"PRId32" %"PRId32" %"PRId32, &iinds[i], &jinds[i], &ivals[i]) != 3)
+              gk_errexit(SIGERR, "Error: Failed to read (i, j, val) for nedge: %zd.\n", i);
+          }
+        }
+        else {
+          if (fscanf(fpin, "%"PRId32" %"PRId32, &iinds[i], &jinds[i]) != 2)
+            gk_errexit(SIGERR, "Error: Failed to read (i, j) value for nedge: %zd.\n", i);
+        }
+        iinds[i] += numbering;
+        jinds[i] += numbering;
+
+        if (nvtxs < iinds[i])
+          nvtxs = iinds[i];
+        if (nvtxs < jinds[i])
+          nvtxs = jinds[i];
+      }
+      gk_fclose(fpin);
+
+      /* convert (i, j, v) into a graph format */
+      graph = gk_graph_Create();
+      graph->nvtxs  = ++nvtxs;
+      xadj   = graph->xadj   = gk_zsmalloc(nvtxs+1, 0, "xadj");
+      adjncy = graph->adjncy = gk_i32malloc(nedges, "adjncy");
+      if (hasvals) {
+        if (isfewgts)
+          fadjwgt = graph->fadjwgt = gk_fmalloc(nedges, "fadjwgt");
+        else
+          iadjwgt = graph->iadjwgt = gk_i32malloc(nedges, "iadjwgt");
+      }
+
+      for (i=0; i<nedges; i++)
+        xadj[iinds[i]]++;
+      MAKECSR(i, nvtxs, xadj);
+
+      for (i=0; i<nedges; i++) {
+        adjncy[xadj[iinds[i]]] = jinds[i];
+        if (hasvals) {
+          if (isfewgts)
+            fadjwgt[xadj[iinds[i]]] = fvals[i];
+          else
+            iadjwgt[xadj[iinds[i]]] = ivals[i];
+        }
+        xadj[iinds[i]]++;
+      }
+      SHIFTCSR(i, nvtxs, xadj);
+
+      gk_free((void **)&iinds, &jinds, &fvals, &ivals, LTERM);
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unrecognized format: %d\n", format);
+  }
+
+  return graph;
+}
+
+
+/**************************************************************************/
+/*! Writes a graph into a file.
+    \param graph is the graph to be written,
+    \param filename is the name of the output file.
+    \param format specifies the format of the output file.
+    \param numbering is either 0 or 1, indicating if the first vertex 
+           will be numbered 0 or 1. Some formats ignore this.
+*/
+/**************************************************************************/
+void gk_graph_Write(gk_graph_t *graph, char *filename, int format, int numbering)
+{
+  int32_t i;
+  ssize_t j;
+  int hasvwgts, hasvsizes, hasewgts;
+  FILE *fpout;
+
+  if (filename)
+    fpout = gk_fopen(filename, "w", "gk_graph_Write: fpout");
+  else
+    fpout = stdout; 
+
+
+  hasewgts  = (graph->iadjwgt || graph->fadjwgt);
+  hasvwgts  = (graph->ivwgts || graph->fvwgts);
+  hasvsizes = (graph->ivsizes || graph->fvsizes);
+
+  switch (format) {
+    case GK_GRAPH_FMT_METIS:
+      /* write the header line */
+      fprintf(fpout, "%d %zd", graph->nvtxs, graph->xadj[graph->nvtxs]/2);
+      if (hasvwgts || hasvsizes || hasewgts) 
+        fprintf(fpout, " %d%d%d", hasvsizes, hasvwgts, hasewgts);
+      fprintf(fpout, "\n");
+    
+    
+      for (i=0; i<graph->nvtxs; i++) {
+        if (hasvsizes) {
+          if (graph->ivsizes)
+            fprintf(fpout, " %d", graph->ivsizes[i]);
+          else
+            fprintf(fpout, " %f", graph->fvsizes[i]);
+        }
+    
+        if (hasvwgts) {
+          if (graph->ivwgts)
+            fprintf(fpout, " %d", graph->ivwgts[i]);
+          else
+            fprintf(fpout, " %f", graph->fvwgts[i]);
+        }
+    
+        for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) {
+          fprintf(fpout, " %d", graph->adjncy[j]+1);
+          if (hasewgts) {
+            if (graph->iadjwgt)
+              fprintf(fpout, " %d", graph->iadjwgt[j]);
+            else 
+              fprintf(fpout, " %f", graph->fadjwgt[j]);
+          }
+        }
+        fprintf(fpout, "\n");
+      }
+      break;
+
+    case GK_GRAPH_FMT_IJV:
+      for (i=0; i<graph->nvtxs; i++) {
+        for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) {
+          fprintf(fpout, "%d %d ", i+numbering, graph->adjncy[j]+numbering);
+          if (hasewgts) {
+            if (graph->iadjwgt)
+              fprintf(fpout, " %d\n", graph->iadjwgt[j]);
+            else 
+              fprintf(fpout, " %f\n", graph->fadjwgt[j]);
+          }
+          else {
+            fprintf(fpout, " 1\n");
+          }
+        }
+      }
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown file format. %d\n", format);
+  }
+
+  if (filename)
+    gk_fclose(fpout);
+}
+
+
+/*************************************************************************/
+/*! Returns a copy of a graph.
+    \param graph is the graph to be duplicated.
+    \returns the newly created copy of the graph.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Dup(gk_graph_t *graph)
+{
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs  = graph->nvtxs;
+
+  /* copy the adjacency structure */
+  if (graph->xadj)
+    ngraph->xadj = gk_zcopy(graph->nvtxs+1, graph->xadj, 
+                            gk_zmalloc(graph->nvtxs+1, "gk_graph_Dup: xadj"));
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivwgts"));
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivsizes"));
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivlabels"));
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvwgts"));
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvsizes"));
+
+
+  if (graph->adjncy)
+    ngraph->adjncy = gk_i32copy(graph->xadj[graph->nvtxs], graph->adjncy, 
+                            gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: adjncy"));
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32copy(graph->xadj[graph->nvtxs], graph->iadjwgt, 
+                            gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: iadjwgt"));
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fcopy(graph->xadj[graph->nvtxs], graph->fadjwgt, 
+                            gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: fadjwgt"));
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns the transpose of a graph.
+    \param graph is the graph to be transposed.
+    \returns the newly created copy of the graph.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Transpose(gk_graph_t *graph)
+{
+  int32_t vi, vj;
+  ssize_t ei;
+
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs  = graph->nvtxs;
+  ngraph->xadj   = gk_zsmalloc(graph->nvtxs+1, 0, "gk_graph_Transpose: xadj");
+  ngraph->adjncy = gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: adjncy");
+
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: iadjwgt");
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: fadjwgt");
+
+  for (vi=0; vi<graph->nvtxs; vi++) {
+    for (ei=graph->xadj[vi]; ei<graph->xadj[vi+1]; ei++)
+      ngraph->xadj[graph->adjncy[ei]]++;
+  }
+  MAKECSR(vi, ngraph->nvtxs, ngraph->xadj);
+
+  for (vi=0; vi<graph->nvtxs; vi++) {
+    for (ei=graph->xadj[vi]; ei<graph->xadj[vi+1]; ei++) {
+      vj = graph->adjncy[ei];
+      ngraph->adjncy[ngraph->xadj[vj]] = vi;
+      if (ngraph->iadjwgt)
+        ngraph->iadjwgt[ngraph->xadj[vj]] = graph->iadjwgt[ei];
+      if (ngraph->fadjwgt)
+        ngraph->fadjwgt[ngraph->xadj[vj]] = graph->fadjwgt[ei];
+      ngraph->xadj[vj]++;
+    }
+  }
+  SHIFTCSR(vi, ngraph->nvtxs, ngraph->xadj);
+
+  /* copy vertex attributes */
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivwgts"));
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivsizes"));
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivlabels"));
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Transpose: fvwgts"));
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Transpose: fvsizes"));
+
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns a subgraph containing a set of consecutive vertices.
+    \param graph is the original graph.
+    \param vstart is the starting vertex.
+    \param nvtxs is the number of vertices from vstart to extract.
+    \returns the newly created subgraph.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs)
+{
+  ssize_t i;
+  gk_graph_t *ngraph;
+
+  if (vstart+nvtxs > graph->nvtxs)
+    return NULL;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs  = nvtxs;
+
+  /* copy the adjancy structure */
+  if (graph->xadj)
+    ngraph->xadj = gk_zcopy(nvtxs+1, graph->xadj+vstart, 
+                              gk_zmalloc(nvtxs+1, "gk_graph_ExtractSubgraph: xadj"));
+  for (i=nvtxs; i>=0; i--)
+    ngraph->xadj[i] -= ngraph->xadj[0];
+  ASSERT(ngraph->xadj[0] == 0);
+
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32copy(nvtxs, graph->ivwgts+vstart, 
+                            gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivwgts"));
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32copy(nvtxs, graph->ivsizes+vstart, 
+                            gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivsizes"));
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32copy(nvtxs, graph->vlabels+vstart, 
+                            gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: vlabels"));
+
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fcopy(nvtxs, graph->fvwgts+vstart, 
+                            gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvwgts"));
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fcopy(nvtxs, graph->fvsizes+vstart, 
+                            gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvsizes"));
+
+
+  ASSERT(ngraph->xadj[nvtxs] == graph->xadj[vstart+nvtxs]-graph->xadj[vstart]);
+  if (graph->adjncy)
+    ngraph->adjncy = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], 
+                            graph->adjncy+graph->xadj[vstart], 
+                            gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart],
+                                       "gk_graph_ExtractSubgraph: adjncy"));
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], 
+                            graph->iadjwgt+graph->xadj[vstart], 
+                            gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart],
+                                       "gk_graph_ExtractSubgraph: iadjwgt"));
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fcopy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], 
+                            graph->fadjwgt+graph->xadj[vstart], 
+                            gk_fmalloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart],
+                                       "gk_graph_ExtractSubgraph: fadjwgt"));
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns a graph that has been reordered according to the permutation.
+    \param[IN] graph is the graph to be re-ordered.
+    \param[IN] perm is the new ordering of the graph's vertices
+    \param[IN] iperm is the original ordering of the re-ordered graph's vertices
+    \returns the newly created copy of the graph.
+
+    \note Either perm or iperm can be NULL but not both.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm)
+{
+  ssize_t j, jj, *xadj;
+  int i, k, u, v, nvtxs;
+  int freeperm=0, freeiperm=0;
+  int32_t *adjncy;
+  gk_graph_t *ngraph;
+
+  if (perm == NULL && iperm == NULL)
+    return NULL;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs = nvtxs = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* allocate memory for the different structures that are present in graph */
+  if (graph->xadj)
+    ngraph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Reorder: xadj");
+
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivwgts");
+
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivsizes");
+
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivlabels");
+
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvwgts");
+
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvsizes");
+
+
+  if (graph->adjncy)
+    ngraph->adjncy = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: adjncy");
+
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: iadjwgt");
+
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fmalloc(graph->xadj[nvtxs], "gk_graph_Reorder: fadjwgt");
+
+
+  /* create perm/iperm if not provided */
+  if (perm == NULL) {
+    freeperm = 1;
+    perm = gk_i32malloc(nvtxs, "gk_graph_Reorder: perm"); 
+    for (i=0; i<nvtxs; i++)
+      perm[iperm[i]] = i;
+  }
+  if (iperm == NULL) {
+    freeiperm = 1;
+    iperm = gk_i32malloc(nvtxs, "gk_graph_Reorder: iperm"); 
+    for (i=0; i<nvtxs; i++)
+      iperm[perm[i]] = i;
+  }
+
+  /* fill-in the information of the re-ordered graph */
+  ngraph->xadj[0] = jj = 0;
+  for (v=0; v<nvtxs; v++) {
+    u = iperm[v];
+    for (j=xadj[u]; j<xadj[u+1]; j++, jj++) {
+      ngraph->adjncy[jj] = perm[adjncy[j]];
+      if (graph->iadjwgt)
+        ngraph->iadjwgt[jj] = graph->iadjwgt[j];
+      if (graph->fadjwgt)
+        ngraph->fadjwgt[jj] = graph->fadjwgt[j];
+    }
+    if (graph->ivwgts)
+      ngraph->ivwgts[v] = graph->ivwgts[u];
+    if (graph->fvwgts)
+      ngraph->fvwgts[v] = graph->fvwgts[u];
+    if (graph->ivsizes)
+      ngraph->ivsizes[v] = graph->ivsizes[u];
+    if (graph->fvsizes)
+      ngraph->fvsizes[v] = graph->fvsizes[u];
+    if (graph->vlabels)
+      ngraph->vlabels[v] = graph->vlabels[u];
+
+    ngraph->xadj[v+1] = jj;
+  }
+
+
+  /* free memory */
+  if (freeperm)
+    gk_free((void **)&perm, LTERM);
+  if (freeiperm)
+    gk_free((void **)&iperm, LTERM);
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! This function finds the connected components in a graph.
+
+    \param graph is the graph structure
+    \param cptr is the ptr structure of the CSR representation of the 
+           components. The length of this vector must be graph->nvtxs+1.
+    \param cind is the indices structure of the CSR representation of 
+           the components. The length of this vector must be graph->nvtxs.
+
+    \returns the number of components that it found.
+
+    \note The cptr and cind parameters can be NULL, in which case only the
+          number of connected components is returned.
+*/
+/*************************************************************************/
+int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind)
+{
+  ssize_t i, ii, j, jj, k, nvtxs, first, last, ntodo, ncmps;
+  ssize_t *xadj;
+  int32_t *adjncy, *pos, *todo;
+  int32_t mustfree_ccsr=0;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* Deal with NULL supplied cptr/cind vectors */
+  if (cptr == NULL) {
+    cptr = gk_i32malloc(nvtxs+1, "gk_graph_FindComponents: cptr");
+    cind = gk_i32malloc(nvtxs, "gk_graph_FindComponents: cind");
+    mustfree_ccsr = 1;
+  }
+
+  /* The list of vertices that have not been touched yet. 
+     The valid entries are from [0..ntodo). */
+  todo = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: todo"));
+
+  /* For a vertex that has not been visited, pos[i] is the position in the
+     todo list that this vertex is stored. 
+     If a vertex has been visited, pos[i] = -1. */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos"));
+
+
+  /* Find the connected componends */
+  ncmps = -1;
+  ntodo = nvtxs;     /* All vertices have not been visited */
+  first = last = 0;  /* Point to the first and last vertices that have been touched
+                        but not explored. 
+                        These vertices are stored in cind[first]...cind[last-1]. */
+  while (1) {
+    if (first == last) { /* Find another starting vertex */
+      cptr[++ncmps] = first;  /* Mark the end of the current CC */
+
+      if (ntodo > 0) {
+        /* put the first vertex in the todo list as the start of the new CC */
+        GKASSERT(pos[todo[0]] != -1);
+        cind[last++] = todo[0];
+
+        pos[todo[0]] = -1;
+        todo[0] = todo[--ntodo];
+        pos[todo[0]] = 0;
+      }
+      else {
+        break;
+      }
+    }
+
+    i = cind[first++];  /* Get the first visited but unexplored vertex */
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      if (pos[k] != -1) {
+        cind[last++] = k;
+
+        /* Remove k from the todo list and put the last item in the todo
+           list at the position that k was so that the todo list will be
+           consequtive. The pos[] array is updated accordingly to keep track
+           the location of the vertices in the todo[] list. */
+        todo[pos[k]] = todo[--ntodo];
+        pos[todo[pos[k]]] = pos[k];
+        pos[k] = -1;
+      }
+    }
+  }
+  GKASSERT(first == nvtxs);
+
+  if (mustfree_ccsr)
+    gk_free((void **)&cptr, &cind, LTERM);
+
+  gk_free((void **)&pos, &todo, LTERM);
+
+  return (int) ncmps;
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    breadth-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+    The algorithm used is a simplified version of the method used to find
+    the connected components.
+
+    \param[IN]  graph is the graph structure
+    \param[IN]  v is the starting vertex of the BFS
+    \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph.
+    \param[OUT] iperm[i] stores the ID of the vertex that corresponds to 
+                the ith vertex in the re-ordered graph.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm,
+          int32_t **r_iperm)
+{
+  ssize_t j, *xadj;
+  int i, k, nvtxs, first, last;
+  int32_t *adjncy, *cot, *pos;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* This array will function like pos + touched of the CC method */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: pos"));
+
+  /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. 
+     Positions from [0...first) is the current iperm[] vector of the explored vertices; 
+     Positions from [first...last) is the OPEN list (i.e., visited vertices);
+     Positions from [last...nvtxs) is the todo list. */
+  cot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: cot"));
+
+
+  /* put v at the front of the todo list */
+  pos[0] = cot[0] = v;
+  pos[v] = cot[v] = 0;
+
+  /* compute a BFS ordering from the seed vertex */
+  first = last = 0;
+  while (first < nvtxs) {
+    if (first == last) { /* Find another starting vertex */
+      k = cot[last];
+      ASSERT(pos[k] != -1);
+      pos[k] = -1; /* mark node as being visited */
+      last++;
+    }
+
+    i = cot[first++];  /* the ++ advances the explored vertices */
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      /* if a node has already been visited, its pos[] will be -1 */
+      if (pos[k] != -1) {
+        /* pos[k] is the location within cot[] where k resides (it is in the 'todo' part); 
+           It is placed in that location cot[last] (end of OPEN list) that we 
+           are about to overwrite and update pos[cot[last]] to reflect that. */
+        cot[pos[k]]    = cot[last]; /* put the head of the todo list to 
+                                       where k was in the todo list */
+        pos[cot[last]] = pos[k];    /* update perm to reflect the move */
+
+        cot[last++] = k;  /* put node at the end of the OPEN list */
+        pos[k]      = -1; /* mark node as being visited */
+      }
+    }
+  }
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    /* use the 'pos' array to build the perm array */
+    for (i=0; i<nvtxs; i++)
+      pos[cot[i]] = i;
+
+    *r_perm = pos;
+    pos = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    *r_iperm = cot;
+    cot = NULL;
+  }
+
+
+  /* cleanup memory */
+  gk_free((void **)&pos, &cot, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    best-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+
+    \param[IN]  graph is the graph structure.
+    \param[IN]  v is the starting vertex of the best-first traversal.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a vertex.
+    \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph.
+    \param[OUT] iperm[i] stores the ID of the vertex that corresponds to 
+                the ith vertex in the re-ordered graph.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  ssize_t j, jj, *xadj;
+  int i, k, u, nvtxs;
+  int32_t *adjncy, *perm, *degrees, *minIDs, *open;
+  gk_i32pq_t *queue;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* the degree of the vertices in the closed list */
+  degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees");
+
+  /* the minimum vertex ID of an open vertex to the closed list */ 
+  minIDs  = gk_i32smalloc(nvtxs, nvtxs+1, "gk_graph_ComputeBestFOrdering: minIDs");
+
+  /* the open list */ 
+  open  = gk_i32malloc(nvtxs, "gk_graph_ComputeBestFOrdering: open");
+
+  /* if perm[i] >= 0, then perm[i] is the order of vertex i; 
+     otherwise perm[i] == -1.
+  */
+  perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm");
+
+  /* create the queue and put everything in it */
+  queue = gk_i32pqCreate(nvtxs);
+  for (i=0; i<nvtxs; i++)
+    gk_i32pqInsert(queue, i, 0);
+  gk_i32pqUpdate(queue, v, 1);
+
+  open[0] = v;
+
+  /* start processing the nodes */
+  for (i=0; i<nvtxs; i++) {
+    if ((v = gk_i32pqGetTop(queue)) == -1) 
+      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);
+    if (perm[v] != -1)
+      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
+    perm[v] = i;
+
+
+    for (j=xadj[v]; j<xadj[v+1]; j++) {
+      u = adjncy[j];
+      if (perm[u] == -1) {
+        degrees[u]++;
+        minIDs[u] = (i < minIDs[u] ? i : minIDs[u]);
+
+        switch (type) {
+          case 1: /* DFS */
+            gk_i32pqUpdate(queue, u, 1);
+            break;
+          case 2: /* Max in closed degree */
+            gk_i32pqUpdate(queue, u, degrees[u]);
+            break;
+          case 3: /* Sum of orders in closed list */
+            for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) {
+              if (perm[adjncy[jj]] != -1)
+                k += perm[adjncy[jj]];
+            }
+            gk_i32pqUpdate(queue, u, k);
+            break;
+          case 4: /* Sum of order-differences (w.r.t. current number) in closed 
+                     list (updated once in a while) */
+            for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) {
+              if (perm[adjncy[jj]] != -1)
+                k += (i-perm[adjncy[jj]]);
+            }
+            gk_i32pqUpdate(queue, u, k);
+            break;
+          default:
+            ;
+        }
+      }
+    }
+  }
+
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    *r_perm = perm;
+    perm = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    /* use the 'degrees' array to build the iperm array */
+    for (i=0; i<nvtxs; i++)
+      degrees[perm[i]] = i;
+
+    *r_iperm = degrees;
+    degrees = NULL;
+  }
+
+
+
+  /* cleanup memory */
+  gk_i32pqDestroy(queue);
+  gk_free((void **)&perm, &degrees, &minIDs, &open, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    best-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+
+    \param[IN]  graph is the graph structure.
+    \param[IN]  v is the starting vertex of the best-first traversal.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a vertex.
+    \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph.
+    \param[OUT] iperm[i] stores the ID of the vertex that corresponds to 
+                the ith vertex in the re-ordered graph.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  ssize_t j, jj, *xadj;
+  int i, k, u, nvtxs, nopen, ntodo;
+  int32_t *adjncy, *perm, *degrees, *sod, *level, *ot, *pos;
+  int64_t *wdegrees;
+  gk_i32pq_t *queue;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* the degree of the vertices in the closed list */
+  degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees");
+
+  /* the weighted degree of the vertices in the closed list for type==3 */
+  wdegrees = gk_i64smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: wdegrees");
+
+  /* the sum of differences for type==4 */
+  sod = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: sod");
+
+  /* the encountering level of a vertex type==5 */
+  level = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: level");
+
+  /* The open+todo list of vertices. 
+     The vertices from [0..nopen] are the open vertices.
+     The vertices from [nopen..ntodo) are the todo vertices.
+     */
+  ot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: ot"));
+
+  /* For a vertex that has not been explored, pos[i] is the position in the ot list. */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos"));
+
+  /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */
+  perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm");
+
+  /* create the queue and put the starting vertex in it */
+  queue = gk_i32pqCreate(nvtxs);
+  gk_i32pqInsert(queue, v, 1);
+
+  /* put v at the front of the open list */
+  pos[0] = ot[0] = v;
+  pos[v] = ot[v] = 0;
+  nopen = 1;
+  ntodo = nvtxs;
+
+  /* start processing the nodes */
+  for (i=0; i<nvtxs; i++) {
+    if (nopen == 0) { /* deal with non-connected graphs */
+      gk_i32pqInsert(queue, ot[0], 1);  
+      nopen++;
+    }
+
+    if ((v = gk_i32pqGetTop(queue)) == -1)
+      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);
+
+    if (perm[v] != -1)
+      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
+    perm[v] = i;
+
+    if (ot[pos[v]] != v)
+      gk_errexit(SIGERR, "Something went wrong [ot[pos[%d]]!=%d.\n", v, v);
+    if (pos[v] >= nopen)
+      gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen);
+
+    /* remove v from the open list and re-arrange the todo part of the list */
+    ot[pos[v]]       = ot[nopen-1];
+    pos[ot[nopen-1]] = pos[v];
+    if (ntodo > nopen) {
+      ot[nopen-1]      = ot[ntodo-1];
+      pos[ot[ntodo-1]] = nopen-1;
+    }
+    nopen--;
+    ntodo--;
+
+    for (j=xadj[v]; j<xadj[v+1]; j++) {
+      u = adjncy[j];
+      if (perm[u] == -1) {
+        /* update ot list, if u is not in the open list by putting it at the end
+           of the open list. */
+        if (degrees[u] == 0) {
+          ot[pos[u]]     = ot[nopen];
+          pos[ot[nopen]] = pos[u];
+          ot[nopen]      = u;
+          pos[u]         = nopen;
+          nopen++;
+
+          level[u] = level[v]+1;
+          gk_i32pqInsert(queue, u, 0);  
+        }
+
+
+        /* update the in-closed degree */
+        degrees[u]++;
+
+        /* update the queues based on the type */
+        switch (type) {
+          case 1: /* DFS */
+            gk_i32pqUpdate(queue, u, 1000*(i+1)+degrees[u]);
+            break;
+
+          case 2: /* Max in closed degree */
+            gk_i32pqUpdate(queue, u, degrees[u]);
+            break;
+
+          case 3: /* Sum of orders in closed list */
+            wdegrees[u] += i;
+            gk_i32pqUpdate(queue, u, (int32_t)sqrt(wdegrees[u]));
+            break;
+
+          case 4: /* Sum of order-differences */
+            /* this is handled at the end of the loop */
+            ;
+            break;
+
+          case 5: /* BFS with in degree priority */
+            gk_i32pqUpdate(queue, u, -(1000*level[u] - degrees[u]));
+            break;
+
+          case 6: /* Hybrid of 1+2 */
+            gk_i32pqUpdate(queue, u, (i+1)*degrees[u]);
+            break;
+
+          default:
+            ;
+        }
+      }
+    }
+
+    if (type == 4) { /* update all the vertices in the open list */
+      for (j=0; j<nopen; j++) {
+        u = ot[j];
+        if (perm[u] != -1)
+          gk_errexit(SIGERR, "For i=%d, the open list contains a closed vertex: ot[%zd]=%d, perm[%d]=%d.\n", i, j, u, u, perm[u]);
+        sod[u] += degrees[u];
+        if (i<1000 || i%25==0)
+          gk_i32pqUpdate(queue, u, sod[u]);
+      }
+    }
+
+    /*
+    for (j=0; j<ntodo; j++) {
+      if (pos[ot[j]] != j)
+        gk_errexit(SIGERR, "pos[ot[%zd]] != %zd.\n", j, j);
+    }
+    */
+
+  }
+
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    *r_perm = perm;
+    perm = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    /* use the 'degrees' array to build the iperm array */
+    for (i=0; i<nvtxs; i++)
+      degrees[perm[i]] = i;
+
+    *r_iperm = degrees;
+    degrees = NULL;
+  }
+
+
+
+  /* cleanup memory */
+  gk_i32pqDestroy(queue);
+  gk_free((void **)&perm, &degrees, &wdegrees, &sod, &ot, &pos, &level, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes the single-source shortest path lengths from the
+    root node to all the other nodes in the graph. If the graph is not 
+    connected then, the sortest part to the vertices in the other components 
+    is -1.
+
+    \param[IN]  graph is the graph structure.
+    \param[IN]  v is the root of the single-source shortest path computations.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a vertex.
+    \param[OUT] sps[i] stores the length of the shortest path from v to vertex i.
+                If no such path exists, then it is -1. Note that the returned
+                array will be either an array of int32_t or an array of floats.
+                The specific type is determined by the existance of non NULL
+                iadjwgt and fadjwgt arrays. If both of these arrays exist, then
+                priority is given to iadjwgt.
+
+    \note The returned array should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps)
+{
+  ssize_t *xadj;
+  int i, u, nvtxs;
+  int32_t *adjncy, *inqueue;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  inqueue = gk_i32smalloc(nvtxs, 0, "gk_graph_SingleSourceShortestPaths: inqueue");
+
+  /* determine if you will be computing using int32_t or float and proceed from there */
+  if (graph->iadjwgt != NULL) {
+    gk_i32pq_t *queue;
+    int32_t *adjwgt;
+    int32_t *sps;
+
+    adjwgt = graph->iadjwgt;
+
+    queue = gk_i32pqCreate(nvtxs);
+    gk_i32pqInsert(queue, v, 0);
+    inqueue[v] = 1;
+
+    sps = gk_i32smalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps");
+    sps[v] = 0;
+
+    /* start processing the nodes */
+    while ((v = gk_i32pqGetTop(queue)) != -1) {
+      inqueue[v] = 2;
+
+      /* relax the adjacent edges */
+      for (i=xadj[v]; i<xadj[v+1]; i++) {
+        u = adjncy[i];
+        if (inqueue[u] == 2)
+          continue;
+
+        if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) {
+          sps[u] = sps[v]+adjwgt[i];
+
+          if (inqueue[u])
+            gk_i32pqUpdate(queue, u, -sps[u]);
+          else {
+            gk_i32pqInsert(queue, u, -sps[u]);
+            inqueue[u] = 1;
+          }
+        }
+      }
+    }
+
+    *r_sps = (void *)sps;
+
+    gk_i32pqDestroy(queue);
+  }
+  else {
+    gk_fpq_t *queue;
+    float *adjwgt;
+    float *sps;
+
+    adjwgt = graph->fadjwgt;
+
+    queue = gk_fpqCreate(nvtxs);
+    gk_fpqInsert(queue, v, 0);
+    inqueue[v] = 1;
+
+    sps = gk_fsmalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps");
+    sps[v] = 0;
+
+    /* start processing the nodes */
+    while ((v = gk_fpqGetTop(queue)) != -1) {
+      inqueue[v] = 2;
+
+      /* relax the adjacent edges */
+      for (i=xadj[v]; i<xadj[v+1]; i++) {
+        u = adjncy[i];
+        if (inqueue[u] == 2)
+          continue;
+
+        if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) {
+          sps[u] = sps[v]+adjwgt[i];
+
+          if (inqueue[u])
+            gk_fpqUpdate(queue, u, -sps[u]);
+          else {
+            gk_fpqInsert(queue, u, -sps[u]);
+            inqueue[u] = 1;
+          }
+        }
+      }
+    }
+
+    *r_sps = (void *)sps;
+
+    gk_fpqDestroy(queue);
+  }
+
+  gk_free((void **)&inqueue, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! Sorts the adjacency lists in increasing vertex order
+    \param graph the graph itself,
+*/
+/**************************************************************************/
+void gk_graph_SortAdjacencies(gk_graph_t *graph)
+{
+  int32_t nvtxs, nn=0;
+  ssize_t *xadj;
+  int32_t *adjncy;
+  int32_t *iadjwgt;
+  float *fadjwgt;
+
+  nvtxs   = graph->nvtxs;
+  xadj    = graph->xadj;
+  adjncy  = graph->adjncy;
+  iadjwgt = graph->iadjwgt;
+  fadjwgt = graph->fadjwgt;
+
+  #pragma omp parallel if (nvtxs > 100)
+  {
+    ssize_t i, j, k;
+    gk_ikv_t *cand;
+    int32_t *itwgts=NULL;
+    float *ftwgts=NULL;
+
+    #pragma omp single
+    for (i=0; i<nvtxs; i++) 
+      nn = gk_max(nn, xadj[i+1]-xadj[i]);
+  
+    cand   = gk_ikvmalloc(nn, "gk_graph_SortIndices: cand");
+    if (iadjwgt)
+      itwgts = gk_i32malloc(nn, "gk_graph_SortIndices: itwgts");
+    if (fadjwgt)
+      ftwgts = gk_fmalloc(nn, "gk_graph_SortIndices: ftwgts");
+  
+    #pragma omp for schedule(static)
+    for (i=0; i<nvtxs; i++) {
+      for (k=0, j=xadj[i]; j<xadj[i+1]; j++) {
+        if (j > xadj[i] && adjncy[j] < adjncy[j-1])
+          k = 1; /* an inversion */
+        cand[j-xadj[i]].val = (int32_t)(j-xadj[i]);
+        cand[j-xadj[i]].key = adjncy[j];
+        if (itwgts)
+          itwgts[j-xadj[i]] = iadjwgt[j];
+        if (ftwgts)
+          ftwgts[j-xadj[i]] = fadjwgt[j];
+      }
+      if (k) {
+        gk_ikvsorti(xadj[i+1]-xadj[i], cand);
+        for (j=xadj[i]; j<xadj[i+1]; j++) {
+          adjncy[j] = cand[j-xadj[i]].key;
+          if (itwgts)
+            iadjwgt[j] = itwgts[cand[j-xadj[i]].val];
+          if (ftwgts)
+            fadjwgt[j] = ftwgts[cand[j-xadj[i]].val];
+        }
+      }
+    }
+
+    gk_free((void **)&cand, &itwgts, &ftwgts, LTERM);
+  }
+}
+
+
+/*************************************************************************/
+/*! Returns a symmetric version of a graph. The symmetric version
+    is constructed by applying an A op A^T operation, where op is one of
+    GK_GRAPH_SYM_SUM, GK_GRAPH_SYM_MIN, GK_GRAPH_SYM_MAX, GK_GRAPH_SYM_AVG.
+   
+    \param mat the matrix to be symmetrized,
+    \param op indicates the operation to be performed. The possible values are
+           GK_GRAPH_SYM_SUM, GK_GRAPH_SYM_MIN, GK_GRAPH_SYM_MAX, and GK_GRAPH_SYM_AVG.
+
+    \returns the symmetrized matrix consisting only of its row-based structure. 
+          The input matrix is not modified. 
+
+TODO: Need to deal with all vertex attributes that are currently do not get
+      copied over.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_MakeSymmetric(gk_graph_t *graph, int op)
+{
+  ssize_t i, j, k, nnz;
+  int nrows, nadj, hasvals;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind, *marker, *ids;
+  float *rowval=NULL, *colval=NULL, *nrowval=NULL, *wgts=NULL;
+  int32_t *irowval=NULL, *icolval=NULL, *nirowval=NULL, *iwgts=NULL;
+  gk_graph_t *ngraph;
+
+  hasvals = (graph->iadjwgt != NULL || graph->fadjwgt != NULL);
+
+  nrows  = graph->nvtxs;
+  rowptr = graph->xadj;
+  rowind = graph->adjncy;
+  if (hasvals) {
+    irowval = graph->iadjwgt;
+     rowval = graph->fadjwgt;
+  }
+
+  /* create the column view for efficient processing */
+  colptr = gk_zsmalloc(nrows+1, 0, "colptr");
+  colind = gk_i32malloc(rowptr[nrows], "colind");
+  if (hasvals) {
+    if (rowval)
+      colval = gk_fmalloc(rowptr[nrows], "colval");
+    if (irowval)
+      icolval = gk_i32malloc(rowptr[nrows], "icolval");
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      colptr[rowind[j]]++;
+  }
+  MAKECSR(i, nrows, colptr);
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      colind[colptr[rowind[j]]] = i;
+      if (hasvals) {
+        if (rowval)
+          colval[colptr[rowind[j]]] = rowval[j];
+        if (irowval)
+          icolval[colptr[rowind[j]]] = irowval[j];
+      }
+      colptr[rowind[j]]++;
+    }
+  }
+  SHIFTCSR(i, nrows, colptr);
+
+
+  ngraph = gk_graph_Create();
+  ngraph->nvtxs = graph->nvtxs;
+
+  nrowptr = ngraph->xadj = gk_zmalloc(nrows+1, "gk_csr_MakeSymmetric: nrowptr");
+  nrowind = ngraph->adjncy = gk_imalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowind");
+  if (hasvals) {
+    if (rowval)
+      nrowval = graph->fadjwgt = gk_fmalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval");
+    if (irowval)
+      nirowval = graph->iadjwgt = gk_i32malloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval");
+  }
+
+  marker = gk_ismalloc(nrows, -1, "marker");
+  ids    = gk_imalloc(nrows, "ids");
+  if (hasvals) {
+    if (rowval)
+      wgts = gk_fmalloc(nrows, "wgts");
+    if (irowval)
+      iwgts = gk_i32malloc(nrows, "wgts");
+  }
+
+  nrowptr[0] = nnz = 0;
+  for (i=0; i<nrows; i++) {
+    nadj = 0;
+    /* out-edges */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      ids[nadj] = rowind[j]; 
+      if (wgts)
+        wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*rowval[j] : rowval[j]);
+      if (iwgts)
+        iwgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*irowval[j] : irowval[j]);
+      marker[rowind[j]] = nadj++;
+    }
+
+    /* in-edges */
+    for (j=colptr[i]; j<colptr[i+1]; j++) {
+      if (marker[colind[j]] == -1) {
+        if (op != GK_CSR_SYM_MIN) {
+          ids[nadj] = colind[j]; 
+          if (wgts) 
+            wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*colval[j] : colval[j]);
+          if (iwgts) 
+            iwgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*icolval[j] : icolval[j]);
+          nadj++;
+        }
+      }
+      else {
+        if (wgts) {
+          switch (op) {
+            case GK_CSR_SYM_MAX:
+              wgts[marker[colind[j]]] = gk_max(colval[j], wgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_MIN:
+              wgts[marker[colind[j]]] = gk_min(colval[j], wgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_SUM:
+              wgts[marker[colind[j]]] += colval[j];
+              break;
+            case GK_CSR_SYM_AVG:
+              wgts[marker[colind[j]]] = 0.5*(wgts[marker[colind[j]]] + colval[j]);
+              break;
+            default:
+              errexit("Unsupported op for MakeSymmetric!\n");
+          }
+        }
+        if (iwgts) {
+          switch (op) {
+            case GK_CSR_SYM_MAX:
+              iwgts[marker[colind[j]]] = gk_max(icolval[j], iwgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_MIN:
+              iwgts[marker[colind[j]]] = gk_min(icolval[j], iwgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_SUM:
+              iwgts[marker[colind[j]]] += icolval[j];
+              break;
+            case GK_CSR_SYM_AVG:
+              iwgts[marker[colind[j]]] = 0.5*(wgts[marker[colind[j]]] + icolval[j]);
+              break;
+            default:
+              errexit("Unsupported op for MakeSymmetric!\n");
+          }
+        }
+        marker[colind[j]] = -1;
+      }
+    }
+
+    /* go over out edges again to resolve any edges that were not found in the in
+     * edges */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      if (marker[rowind[j]] != -1) {
+        if (op == GK_CSR_SYM_MIN)
+          ids[marker[rowind[j]]] = -1;
+        marker[rowind[j]] = -1;
+      }
+    }
+
+    /* put the non '-1' entries in ids[] into i's row */
+    for (j=0; j<nadj; j++) {
+      if (ids[j] != -1) {
+        nrowind[nnz] = ids[j];
+        if (wgts)
+          nrowval[nnz] = wgts[j];
+        if (iwgts)
+          nirowval[nnz] = iwgts[j];
+        nnz++;
+      }
+    }
+    nrowptr[i+1] = nnz;
+  }
+
+  gk_free((void **)&colptr, &colind, &colval, &icolval, &marker, &ids, &wgts, &iwgts, LTERM);
+
+  return ngraph;
+}
+
+
+
+#ifdef XXX
+
+/*************************************************************************/
+/*! Returns a subgraphrix containing a certain set of rows.
+    \param graph is the original graphrix.
+    \param nrows is the number of rows to extract.
+    \param rind is the set of row numbers to extract.
+    \returns the row structure of the newly created subgraphrix.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_ExtractRows(gk_graph_t *graph, int nrows, int *rind)
+{
+  ssize_t i, ii, j, nnz;
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nrows = nrows;
+  ngraph->ncols = graph->ncols;
+
+  for (nnz=0, i=0; i<nrows; i++)  
+    nnz += graph->rowptr[rind[i]+1]-graph->rowptr[rind[i]];
+
+  ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr");
+  ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind");
+  ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval");
+
+  ngraph->rowptr[0] = 0;
+  for (nnz=0, j=0, ii=0; ii<nrows; ii++) {
+    i = rind[ii];
+    gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz);
+    gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz);
+    nnz += graph->rowptr[i+1]-graph->rowptr[i];
+    ngraph->rowptr[++j] = nnz;
+  }
+  ASSERT(j == ngraph->nrows);
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns a subgraphrix corresponding to a specified partitioning of rows.
+    \param graph is the original graphrix.
+    \param part is the partitioning vector of the rows.
+    \param pid is the partition ID that will be extracted.
+    \returns the row structure of the newly created subgraphrix.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_ExtractPartition(gk_graph_t *graph, int *part, int pid)
+{
+  ssize_t i, j, nnz;
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nrows = 0;
+  ngraph->ncols = graph->ncols;
+
+  for (nnz=0, i=0; i<graph->nrows; i++) {
+    if (part[i] == pid) {
+      ngraph->nrows++;
+      nnz += graph->rowptr[i+1]-graph->rowptr[i];
+    }
+  }
+
+  ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr");
+  ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind");
+  ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval");
+
+  ngraph->rowptr[0] = 0;
+  for (nnz=0, j=0, i=0; i<graph->nrows; i++) {
+    if (part[i] == pid) {
+      gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz);
+      gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz);
+      nnz += graph->rowptr[i+1]-graph->rowptr[i];
+      ngraph->rowptr[++j] = nnz;
+    }
+  }
+  ASSERT(j == ngraph->nrows);
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Splits the graphrix into multiple sub-graphrices based on the provided
+    color array.
+    \param graph is the original graphrix.
+    \param color is an array of size equal to the number of non-zeros
+           in the graphrix (row-wise structure). The graphrix is split into
+           as many parts as the number of colors. For meaningfull results,
+           the colors should be numbered consecutively starting from 0.
+    \returns an array of graphrices for each supplied color number.
+*/
+/**************************************************************************/
+gk_graph_t **gk_graph_Split(gk_graph_t *graph, int *color)
+{
+  ssize_t i, j;
+  int nrows, ncolors;
+  ssize_t *rowptr;
+  int *rowind;
+  float *rowval;
+  gk_graph_t **sgraphs;
+
+  nrows  = graph->nrows;
+  rowptr = graph->rowptr;
+  rowind = graph->rowind;
+  rowval = graph->rowval;
+
+  ncolors = gk_imax(rowptr[nrows], color)+1;
+
+  sgraphs = (gk_graph_t **)gk_malloc(sizeof(gk_graph_t *)*ncolors, "gk_graph_Split: sgraphs");
+  for (i=0; i<ncolors; i++) {
+    sgraphs[i] = gk_graph_Create();
+    sgraphs[i]->nrows  = graph->nrows;
+    sgraphs[i]->ncols  = graph->ncols;
+    sgraphs[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_graph_Split: sgraphs[i]->rowptr"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      sgraphs[color[j]]->rowptr[i]++;
+  }
+  for (i=0; i<ncolors; i++) 
+    MAKECSR(j, nrows, sgraphs[i]->rowptr);
+
+  for (i=0; i<ncolors; i++) {
+    sgraphs[i]->rowind = gk_imalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowind"); 
+    sgraphs[i]->rowval = gk_fmalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowval"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      sgraphs[color[j]]->rowind[sgraphs[color[j]]->rowptr[i]] = rowind[j];
+      sgraphs[color[j]]->rowval[sgraphs[color[j]]->rowptr[i]] = rowval[j];
+      sgraphs[color[j]]->rowptr[i]++;
+    }
+  }
+
+  for (i=0; i<ncolors; i++) 
+    SHIFTCSR(j, nrows, sgraphs[i]->rowptr);
+
+  return sgraphs;
+}
+
+
+/*************************************************************************/
+/*! Prunes certain rows/columns of the graphrix. The prunning takes place 
+    by analyzing the row structure of the graphrix. The prunning takes place
+    by removing rows/columns but it does not affect the numbering of the
+    remaining rows/columns.
+   
+    \param graph the graphrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the graphrix will be prunned,
+    \param minf is the minimum number of rows (columns) that a column (row) must
+           be present in order to be kept,
+    \param maxf is the maximum number of rows (columns) that a column (row) must
+          be present at in order to be kept.
+    \returns the prunned graphrix consisting only of its row-based structure. 
+          The input graphrix is not modified. 
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Prune(gk_graph_t *graph, int what, int minf, int maxf)
+{
+  ssize_t i, j, nnz;
+  int nrows, ncols;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind, *collen;
+  float *rowval, *nrowval;
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+  
+  nrows = ngraph->nrows = graph->nrows;
+  ncols = ngraph->ncols = graph->ncols;
+
+  rowptr = graph->rowptr;
+  rowind = graph->rowind;
+  rowval = graph->rowval;
+
+  nrowptr = ngraph->rowptr = gk_zmalloc(nrows+1, "gk_graph_Prune: nrowptr");
+  nrowind = ngraph->rowind = gk_imalloc(rowptr[nrows], "gk_graph_Prune: nrowind");
+  nrowval = ngraph->rowval = gk_fmalloc(rowptr[nrows], "gk_graph_Prune: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      collen = gk_ismalloc(ncols, 0, "gk_graph_Prune: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          ASSERT(rowind[j] < ncols);
+          collen[rowind[j]]++;
+        }
+      }
+      for (i=0; i<ncols; i++)
+        collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0);
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          if (collen[rowind[j]]) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+            nnz++;
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      gk_free((void **)&collen, LTERM);
+      break;
+
+    case GK_CSR_ROW:
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      break;
+
+    default:
+      gk_graph_Free(&ngraph);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return ngraph;
+}
+
+
+
+/*************************************************************************/
+/*! Normalizes the rows/columns of the graphrix to be unit 
+    length.
+    \param graph the graphrix itself,
+    \param what indicates what will be normalized and is obtained by
+           specifying GK_CSR_ROW, GK_CSR_COL, GK_CSR_ROW|GK_CSR_COL. 
+    \param norm indicates what norm is to normalize to, 1: 1-norm, 2: 2-norm
+*/
+/**************************************************************************/
+void gk_graph_Normalize(gk_graph_t *graph, int what, int norm)
+{
+  ssize_t i, j;
+  int n;
+  ssize_t *ptr;
+  float *val, sum;
+
+  if (what&GK_CSR_ROW && graph->rowval) {
+    n   = graph->nrows;
+    ptr = graph->rowptr;
+    val = graph->rowval;
+
+    #pragma omp parallel if (ptr[n] > OMPMINOPS) 
+    {
+      #pragma omp for private(j,sum) schedule(static)
+      for (i=0; i<n; i++) {
+        for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++){
+  	if (norm == 2)
+  	  sum += val[j]*val[j];
+  	else if (norm == 1)
+  	  sum += val[j]; /* assume val[j] > 0 */ 
+        }
+        if (sum > 0) {
+  	if (norm == 2)
+  	  sum=1.0/sqrt(sum); 
+  	else if (norm == 1)
+  	  sum=1.0/sum; 
+          for (j=ptr[i]; j<ptr[i+1]; j++)
+            val[j] *= sum;
+  	
+        }
+      }
+    }
+  }
+
+  if (what&GK_CSR_COL && graph->colval) {
+    n   = graph->ncols;
+    ptr = graph->colptr;
+    val = graph->colval;
+
+    #pragma omp parallel if (ptr[n] > OMPMINOPS)
+    {
+    #pragma omp for private(j,sum) schedule(static)
+      for (i=0; i<n; i++) {
+        for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++)
+  	if (norm == 2)
+  	  sum += val[j]*val[j];
+  	else if (norm == 1)
+  	  sum += val[j]; 
+        if (sum > 0) {
+  	if (norm == 2)
+  	  sum=1.0/sqrt(sum); 
+  	else if (norm == 1)
+  	  sum=1.0/sum; 
+          for (j=ptr[i]; j<ptr[i+1]; j++)
+            val[j] *= sum;
+        }
+      }
+    }
+  }
+}
+
+
+#endif
diff --git a/htable.c b/htable.c
new file mode 100644
index 0000000..078e114
--- /dev/null
+++ b/htable.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2004, Regents of the University of Minnesota
+ *
+ * This file contains routines for manipulating a direct-access hash table
+ *
+ * Started 3/22/04
+ * George
+ *
+ */
+
+#include <GKlib.h>
+
+/******************************************************************************
+* This function creates the hash-table
+*******************************************************************************/
+gk_HTable_t *HTable_Create(int nelements)
+{
+  gk_HTable_t *htable;
+
+  htable            = gk_malloc(sizeof(gk_HTable_t), "HTable_Create: htable");
+  htable->harray    = gk_ikvmalloc(nelements, "HTable_Create: harray");
+  htable->nelements = nelements;
+
+  HTable_Reset(htable);
+
+  return htable;
+}
+
+
+/******************************************************************************
+* This function resets the data-structures associated with the hash-table
+*******************************************************************************/
+void HTable_Reset(gk_HTable_t *htable)
+{
+  int i;
+
+  for (i=0; i<htable->nelements; i++)
+    htable->harray[i].key = HTABLE_EMPTY;
+  htable->htsize = 0;
+
+}
+
+/******************************************************************************
+* This function resizes the hash-table
+*******************************************************************************/
+void HTable_Resize(gk_HTable_t *htable, int nelements)
+{
+  int i, old_nelements;
+  gk_ikv_t *old_harray;
+
+  old_nelements = htable->nelements;
+  old_harray = htable->harray;
+
+  /* prepare larger hash */
+  htable->nelements = nelements;
+  htable->htsize = 0;
+  htable->harray = gk_ikvmalloc(nelements, "HTable_Resize: harray");
+  for (i=0; i<nelements; i++)
+    htable->harray[i].key = HTABLE_EMPTY;
+
+  /* reassign the values */
+  for (i=0; i<old_nelements; i++)
+    if (old_harray[i].key != HTABLE_EMPTY)
+       HTable_Insert(htable, old_harray[i].key, old_harray[i].val);
+
+  /* remove old harray */
+  gk_free((void **)&old_harray, LTERM);
+}
+
+
+/******************************************************************************
+* This function inserts a key-value pair in the array
+*******************************************************************************/
+void HTable_Insert(gk_HTable_t *htable, int key, int val)
+{
+  int i, first;
+
+  if (htable->htsize > htable->nelements/2)
+    HTable_Resize(htable, 2*htable->nelements);
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) {
+      htable->harray[i].key = key;
+      htable->harray[i].val = val;
+      htable->htsize++;
+      return;
+    }
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) {
+      htable->harray[i].key = key;
+      htable->harray[i].val = val;
+      htable->htsize++;
+      return;
+    }
+  }
+
+}
+
+
+/******************************************************************************
+* This function deletes key from the htable
+*******************************************************************************/
+void HTable_Delete(gk_HTable_t *htable, int key)
+{
+  int i, first;
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return;
+    }
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return;
+    }
+  }
+
+}
+
+
+/******************************************************************************
+* This function returns the data associated with the key in the hastable
+*******************************************************************************/
+int HTable_Search(gk_HTable_t *htable, int key)
+{
+  int i, first;
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == key) 
+      return htable->harray[i].val;
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      return -1;
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == key) 
+      return htable->harray[i].val;
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      return -1;
+  }
+
+  return -1;
+}
+
+
+/******************************************************************************
+* This function returns the next key/val
+*******************************************************************************/
+int HTable_GetNext(gk_HTable_t *htable, int key, int *r_val, int type)
+{
+  int i;
+  static int first, last;
+
+  if (type == HTABLE_FIRST)
+    first = last = HTable_HFunction(htable->nelements, key);
+
+  if (first > last) {
+    for (i=first; i<htable->nelements; i++) {
+      if (htable->harray[i].key == key) {
+        *r_val = htable->harray[i].val;
+        first = i+1;
+        return 1;
+      }
+      else if (htable->harray[i].key == HTABLE_EMPTY)
+        return -1;
+    }
+    first = 0;
+  }
+
+  for (i=first; i<last; i++) {
+    if (htable->harray[i].key == key) {
+      *r_val = htable->harray[i].val;
+      first = i+1;
+      return 1;
+    }
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      return -1;
+  }
+
+  return -1;
+}
+
+
+/******************************************************************************
+* This function returns the data associated with the key in the hastable
+*******************************************************************************/
+int HTable_SearchAndDelete(gk_HTable_t *htable, int key)
+{
+  int i, first;
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return htable->harray[i].val;
+    }
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n");
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return htable->harray[i].val;
+    }
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n");
+  }
+
+  return -1;
+
+}
+
+
+
+/******************************************************************************
+* This function destroys the data structures associated with the hash-table
+*******************************************************************************/
+void HTable_Destroy(gk_HTable_t *htable)
+{
+  gk_free((void **)&htable->harray, &htable, LTERM);
+}
+
+
+/******************************************************************************
+* This is the hash-function. Based on multiplication
+*******************************************************************************/
+int HTable_HFunction(int nelements, int key)
+{
+  return (int)(key%nelements);
+}
diff --git a/io.c b/io.c
new file mode 100644
index 0000000..289b401
--- /dev/null
+++ b/io.c
@@ -0,0 +1,681 @@
+/*!
+\file  io.c
+\brief Various file I/O functions.
+
+This file contains various functions that perform I/O.
+
+\date Started 4/10/95
+\author George
+\version\verbatim $Id: io.c 18951 2015-08-08 20:10:46Z karypis $ \endverbatim
+*/
+
+#ifdef HAVE_GETLINE
+/* Get getline to be defined. */
+#define _GNU_SOURCE
+#include <stdio.h>
+#undef _GNU_SOURCE
+#endif
+
+#include <GKlib.h>
+
+/*************************************************************************
+* This function opens a file
+**************************************************************************/
+FILE *gk_fopen(char *fname, char *mode, const char *msg)
+{
+  FILE *fp;
+  char errmsg[8192];
+
+  fp = fopen(fname, mode);
+  if (fp != NULL)
+    return fp;
+
+  sprintf(errmsg,"file: %s, mode: %s, [%s]", fname, mode, msg);
+  perror(errmsg);
+  errexit("Failed on gk_fopen()\n");
+
+  return NULL;
+}
+
+
+/*************************************************************************
+* This function closes a file
+**************************************************************************/
+void gk_fclose(FILE *fp)
+{
+  fclose(fp);
+}
+
+
+/*************************************************************************/
+/*! This function is a wrapper around the read() function that ensures 
+    that all data is been read, by issuing multiple read requests.
+    The only time when not 'count' items are read is when the EOF has been
+    reached.
+*/
+/*************************************************************************/
+ssize_t gk_read(int fd, void *vbuf, size_t count)
+{
+  char *buf = (char *)vbuf;
+  ssize_t rsize, tsize=count;
+
+  do {
+    if ((rsize = read(fd, buf, tsize)) == -1)
+      return -1;
+    buf   += rsize;
+    tsize -= rsize;
+  } while (tsize > 0 && rsize > 0);
+
+  return count-tsize;
+}
+
+
+/*************************************************************************/
+/*! This function is a wrapper around the write() function that ensures 
+    that all data is been written, by issueing multiple write requests.
+*/
+/*************************************************************************/
+ssize_t gk_write(int fd, void *vbuf, size_t count)
+{
+  char *buf = (char *)vbuf;
+  ssize_t size, tsize=count;
+
+  do {
+    if ((size = write(fd, buf, tsize)) == -1)
+      return -1;
+    buf   += size;
+    tsize -= size;
+  } while (tsize > 0);
+
+  return count;
+}
+
+
+/*************************************************************************/
+/*! This function is the GKlib implementation of glibc's getline()
+    function.
+    \returns -1 if the EOF has been reached, otherwise it returns the 
+             number of bytes read.
+*/
+/*************************************************************************/
+ssize_t gk_getline(char **lineptr, size_t *n, FILE *stream)
+{
+#ifdef HAVE_GETLINE
+  return getline(lineptr, n, stream);
+#else
+  size_t i;
+  int ch;
+
+  if (feof(stream))
+    return -1;  
+
+  /* Initial memory allocation if *lineptr is NULL */
+  if (*lineptr == NULL || *n == 0) {
+    *n = 1024;
+    *lineptr = gk_malloc((*n)*sizeof(char), "gk_getline: lineptr");
+  }
+
+  /* get into the main loop */
+  i = 0;
+  while ((ch = getc(stream)) != EOF) {
+    (*lineptr)[i++] = (char)ch;
+
+    /* reallocate memory if reached at the end of the buffer. The +1 is for '\0' */
+    if (i+1 == *n) { 
+      *n = 2*(*n);
+      *lineptr = gk_realloc(*lineptr, (*n)*sizeof(char), "gk_getline: lineptr");
+    }
+      
+    if (ch == '\n')
+      break;
+  }
+  (*lineptr)[i] = '\0';
+
+  return (i == 0 ? -1 : i);
+#endif
+}
+
+
+/*************************************************************************/
+/*! This function reads the contents of a text file and returns it in the
+    form of an array of strings.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+char **gk_readfile(char *fname, size_t *r_nlines)
+{
+  size_t lnlen, nlines=0;
+  char *line=NULL, **lines=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    lines = (char **)gk_malloc(nlines*sizeof(char *), "gk_readfile: lines");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      gk_strtprune(line, "\n\r");
+      lines[nlines++] = gk_strdup(line);
+    }
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return lines;
+}
+
+
+/*************************************************************************/
+/*! This function reads the contents of a file and returns it in the
+    form of an array of int32_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int32_t *gk_i32readfile(char *fname, size_t *r_nlines)
+{
+  size_t lnlen, nlines=0;
+  char *line=NULL;
+  int32_t *array=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    array = gk_i32malloc(nlines, "gk_i32readfile: array");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      sscanf(line, "%"SCNd32, &array[nlines++]);
+    }
+
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a file and returns it in the
+    form of an array of int64_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int64_t *gk_i64readfile(char *fname, size_t *r_nlines)
+{
+  size_t lnlen, nlines=0;
+  char *line=NULL;
+  int64_t *array=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    array = gk_i64malloc(nlines, "gk_i64readfile: array");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      sscanf(line, "%"SCNd64, &array[nlines++]);
+    }
+
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a file and returns it in the
+    form of an array of ssize_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+ssize_t *gk_zreadfile(char *fname, size_t *r_nlines)
+{
+  size_t lnlen, nlines=0;
+  char *line=NULL;
+  ssize_t *array=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    array = gk_zmalloc(nlines, "gk_zreadfile: array");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      sscanf(line, "%zd", &array[nlines++]);
+    }
+
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of char.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+char *gk_creadfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  char *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  nelmnts = fsize;
+  array = gk_cmalloc(nelmnts, "gk_creadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_creadfilebin");
+  if (fread(array, sizeof(char), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zu\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_cwritefilebin(char *fname, size_t n, char *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(char), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of int32_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  int32_t *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(int32_t) != 0) {
+    gk_errexit(SIGERR, "The size [%zd] of the file [%s] is not in multiples of sizeof(int32_t).\n", fsize, fname);
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(int32_t);
+  array = gk_i32malloc(nelmnts, "gk_i32readfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_i32readfilebin");
+  
+  if (fread(array, sizeof(int32_t), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(int32_t), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of int64_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  int64_t *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(int64_t) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(int64_t).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(int64_t);
+  array = gk_i64malloc(nelmnts, "gk_i64readfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_i64readfilebin");
+  
+  if (fread(array, sizeof(int64_t), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(int64_t), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of ssize_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  ssize_t *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(ssize_t) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(ssize_t).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(ssize_t);
+  array = gk_zmalloc(nelmnts, "gk_zreadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_zreadfilebin");
+  
+  if (fread(array, sizeof(ssize_t), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(ssize_t), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of float.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+float *gk_freadfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  float *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(float) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(float).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(float);
+  array = gk_fmalloc(nelmnts, "gk_freadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_freadfilebin");
+  
+  if (fread(array, sizeof(float), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_fwritefilebin(char *fname, size_t n, float *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_fwritefilebin");
+
+  fsize = fwrite(a, sizeof(float), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of double.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+double *gk_dreadfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  double *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(double) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(double).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(double);
+  array = gk_dmalloc(nelmnts, "gk_dreadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_dreadfilebin");
+  
+  if (fread(array, sizeof(double), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_dwritefilebin(char *fname, size_t n, double *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(double), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
diff --git a/itemsets.c b/itemsets.c
new file mode 100644
index 0000000..beb58ae
--- /dev/null
+++ b/itemsets.c
@@ -0,0 +1,210 @@
+/*!
+ * \file
+ * \brief Frequent/Closed itemset discovery routines 
+ *
+ * This file contains the code for finding frequent/closed itemests. These routines
+ * are implemented using a call-back mechanism to deal with the discovered itemsets.
+ *
+ * \date 6/13/2008
+ * \author George Karypis
+ * \version\verbatim $Id: itemsets.c 19240 2015-10-22 12:41:19Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+/*-------------------------------------------------------------*/
+/*! Data structures for use within this module */
+/*-------------------------------------------------------------*/
+typedef struct {
+  int minfreq;  /* the minimum frequency of a pattern */
+  int maxfreq;  /* the maximum frequency of a pattern */
+  int minlen;   /* the minimum length of the requested pattern */
+  int maxlen;   /* the maximum length of the requested pattern */
+  int tnitems;  /* the initial range of the item space */
+
+  /* the call-back function */
+  void (*callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids); 
+  void *stateptr;   /* the user-supplied pointer to pass to the callback */
+
+  /* workspace variables */
+  int *rmarker;
+  gk_ikv_t *cand;
+} isparams_t;
+
+
+/*-------------------------------------------------------------*/
+/*! Prototypes for this module */
+/*-------------------------------------------------------------*/
+void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, 
+         int preflen, int *prefix);
+gk_csr_t *itemsets_project_matrix(isparams_t *param, gk_csr_t *mat, int cid);
+
+
+
+/*************************************************************************/
+/*! The entry point of the frequent itemset discovery code */
+/*************************************************************************/
+void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind, 
+        int minfreq, int maxfreq, int minlen, int maxlen, 
+        void (*process_itemset)(void *stateptr, int nitems, int *itemids, 
+                                int ntrans, int *transids),
+        void *stateptr)
+{
+  ssize_t i;
+  gk_csr_t *mat, *pmat;
+  isparams_t params;
+  int *pattern;
+
+  /* Create the matrix */
+  mat = gk_csr_Create();
+  mat->nrows  = ntrans;
+  mat->ncols  = tranind[gk_iargmax(tranptr[ntrans], tranind, 1)]+1;
+  mat->rowptr = gk_zcopy(ntrans+1, tranptr, gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"));
+  mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind"));
+  mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids"));
+
+  /* Setup the parameters */
+  params.minfreq  = minfreq;
+  params.maxfreq  = (maxfreq == -1 ? mat->nrows : maxfreq);
+  params.minlen   = minlen;
+  params.maxlen   = (maxlen == -1 ? mat->ncols : maxlen);
+  params.tnitems  = mat->ncols;
+  params.callback = process_itemset;
+  params.stateptr = stateptr;
+  params.rmarker  = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker");
+  params.cand     = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand");
+
+  /* Perform the initial projection */
+  gk_csr_CreateIndex(mat, GK_CSR_COL);
+  pmat = itemsets_project_matrix(&params, mat, -1);
+  gk_csr_Free(&mat);
+
+  pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern");
+  itemsets_find_frequent_itemsets(&params, pmat, 0, pattern); 
+
+  gk_csr_Free(&pmat);
+  gk_free((void **)&pattern, &params.rmarker, &params.cand, LTERM);
+
+}
+
+
+
+/*************************************************************************/
+/*! The recursive routine for DFS-based frequent pattern discovery */
+/*************************************************************************/
+void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, 
+         int preflen, int *prefix)
+{
+  ssize_t i;
+  gk_csr_t *cmat;
+
+  /* Project each frequent column */
+  for (i=0; i<mat->ncols; i++) {
+    prefix[preflen] = mat->colids[i];
+
+    if (preflen+1 >= params->minlen)
+      (*params->callback)(params->stateptr, preflen+1, prefix, 
+           mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]);
+
+    if (preflen+1 < params->maxlen) {
+      cmat = itemsets_project_matrix(params, mat, i);
+      itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix);
+      gk_csr_Free(&cmat);
+    }
+  }
+
+}
+
+
+/******************************************************************************/
+/*! This function projects a matrix w.r.t. to a particular column. 
+    It performs the following steps:
+    - Determines the length of each column that is remaining.
+    - Sorts the columns in increasing length.
+    - Creates a column-based version of the matrix with the proper
+      column ordering.
+ */
+/*******************************************************************************/
+gk_csr_t *itemsets_project_matrix(isparams_t *params, gk_csr_t *mat, int cid)
+{
+  ssize_t i, j, k, ii, pnnz;
+  int nrows, ncols, pnrows, pncols;
+  ssize_t *colptr, *pcolptr;
+  int *colind, *colids, *pcolind, *pcolids, *rmarker;
+  gk_csr_t *pmat;
+  gk_ikv_t *cand;
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colids = mat->colids;
+
+  rmarker = params->rmarker;
+  cand    = params->cand;
+
+
+  /* Allocate space for the projected matrix based on what you know thus far */
+  pmat = gk_csr_Create();
+  pmat->nrows  = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]);
+
+
+  /* Mark the rows that will be kept and determine the prowids */
+  if (cid == -1) { /* Initial projection */
+    gk_iset(nrows, 1, rmarker);
+  }
+  else { /* The other projections */
+    for (i=colptr[cid]; i<colptr[cid+1]; i++) 
+      rmarker[colind[i]] = 1;
+  }
+
+
+  /* Determine the length of each column that will be left in the projected matrix */
+  for (pncols=0, pnnz=0, i=cid+1; i<ncols; i++) {
+    for (k=0, j=colptr[i]; j<colptr[i+1]; j++) {
+      k += rmarker[colind[j]];
+    }
+    if (k >= params->minfreq && k <= params->maxfreq) {
+      cand[pncols].val   = i;
+      cand[pncols++].key = k;
+      pnnz += k;
+    }
+  }
+
+  /* Sort the columns in increasing order */
+  gk_ikvsorti(pncols, cand);
+
+
+  /* Allocate space for the remaining fields of the projected matrix */
+  pmat->ncols  = pncols;
+  pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids");
+  pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr");
+  pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind");
+
+
+  /* Populate the projected matrix */
+  pcolptr[0] = 0;
+  for (pnnz=0, ii=0; ii<pncols; ii++) {
+    i = cand[ii].val;
+    for (j=colptr[i]; j<colptr[i+1]; j++) {
+      if (rmarker[colind[j]]) 
+        pcolind[pnnz++] = colind[j];
+    }
+
+    pcolids[ii] = colids[i];
+    pcolptr[ii+1] = pnnz;
+  }
+
+
+  /* Reset the rmarker array */
+  if (cid == -1) { /* Initial projection */
+    gk_iset(nrows, 0, rmarker);
+  }
+  else { /* The other projections */
+    for (i=colptr[cid]; i<colptr[cid+1]; i++) 
+      rmarker[colind[i]] = 0;
+  }
+
+
+  return pmat;
+}
diff --git a/mcore.c b/mcore.c
new file mode 100644
index 0000000..6442e03
--- /dev/null
+++ b/mcore.c
@@ -0,0 +1,393 @@
+/*!
+\file 
+\brief Functions dealing with creating and allocating mcores
+
+\date Started 5/30/11
+\author George
+\author Copyright 1997-2011, Regents of the University of Minnesota 
+\version $Id: mcore.c 13953 2013-03-30 16:20:07Z karypis $
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! This function creates an mcore 
+ */
+/*************************************************************************/
+gk_mcore_t *gk_mcoreCreate(size_t coresize)
+{
+  gk_mcore_t *mcore;
+
+  mcore = (gk_mcore_t *)gk_malloc(sizeof(gk_mcore_t), "gk_mcoreCreate: mcore");
+  memset(mcore, 0, sizeof(gk_mcore_t));
+
+  mcore->coresize = coresize;
+  mcore->corecpos = 0;
+
+  mcore->core = (coresize == 0 ? NULL : gk_malloc(mcore->coresize, "gk_mcoreCreate: core"));
+
+  /* allocate the memory for keeping track of malloc ops */
+  mcore->nmops = 2048;
+  mcore->cmop  = 0;
+  mcore->mops  = (gk_mop_t *)gk_malloc(mcore->nmops*sizeof(gk_mop_t), "gk_mcoreCreate: mcore->mops");
+
+  return mcore;
+}
+
+
+/*************************************************************************/
+/*! This function creates an mcore. This version is used for gkmcore.
+ */
+/*************************************************************************/
+gk_mcore_t *gk_gkmcoreCreate()
+{
+  gk_mcore_t *mcore;
+
+  if ((mcore = (gk_mcore_t *)malloc(sizeof(gk_mcore_t))) == NULL)
+    return NULL;
+  memset(mcore, 0, sizeof(gk_mcore_t));
+
+  /* allocate the memory for keeping track of malloc ops */
+  mcore->nmops = 2048;
+  mcore->cmop  = 0;
+  if ((mcore->mops = (gk_mop_t *)malloc(mcore->nmops*sizeof(gk_mop_t))) == NULL) {
+    free(mcore);
+    return NULL;
+  }
+
+  return mcore;
+}
+
+
+/*************************************************************************/
+/*! This function destroys an mcore.
+ */
+/*************************************************************************/
+void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats)
+{
+  gk_mcore_t *mcore = *r_mcore;
+
+  if (mcore == NULL)
+    return;
+
+  if (showstats)
+    printf("\n gk_mcore statistics\n" 
+           "           coresize: %12zu         nmops: %12zu  cmop: %6zu\n"
+           "        num_callocs: %12zu   num_hallocs: %12zu\n"
+           "       size_callocs: %12zu  size_hallocs: %12zu\n"
+           "        cur_callocs: %12zu   cur_hallocs: %12zu\n"
+           "        max_callocs: %12zu   max_hallocs: %12zu\n",
+           mcore->coresize, mcore->nmops, mcore->cmop,
+           mcore->num_callocs,  mcore->num_hallocs,
+           mcore->size_callocs, mcore->size_hallocs,
+           mcore->cur_callocs,  mcore->cur_hallocs,
+           mcore->max_callocs,  mcore->max_hallocs);
+
+  if (mcore->cur_callocs != 0 || mcore->cur_hallocs != 0 || mcore->cmop != 0) {
+    printf("***Warning: mcore memory was not fully freed when destroyed.\n"
+           " cur_callocs: %6zu  cur_hallocs: %6zu cmop: %6zu\n",
+           mcore->cur_callocs,  mcore->cur_hallocs, mcore->cmop);
+  }
+
+  gk_free((void **)&mcore->core, &mcore->mops, &mcore, LTERM);
+
+  *r_mcore = NULL;
+}
+
+
+/*************************************************************************/
+/*! This function destroys an mcore. This version is for gkmcore.
+ */
+/*************************************************************************/
+void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats)
+{
+  gk_mcore_t *mcore = *r_mcore;
+
+  if (mcore == NULL)
+    return;
+
+  if (showstats)
+    printf("\n gk_mcore statistics\n" 
+           "         nmops: %12zu  cmop: %6zu\n"
+           "   num_hallocs: %12zu\n"
+           "  size_hallocs: %12zu\n"
+           "   cur_hallocs: %12zu\n"
+           "   max_hallocs: %12zu\n",
+           mcore->nmops, mcore->cmop,
+           mcore->num_hallocs,
+           mcore->size_hallocs,
+           mcore->cur_hallocs,
+           mcore->max_hallocs);
+
+  if (mcore->cur_hallocs != 0 || mcore->cmop != 0) {
+    printf("***Warning: mcore memory was not fully freed when destroyed.\n"
+           " cur_hallocs: %6zu cmop: %6zu\n",
+           mcore->cur_hallocs, mcore->cmop);
+  }
+
+  free(mcore->mops);
+  free(mcore);
+
+  *r_mcore = NULL;
+}
+
+
+/*************************************************************************/
+/*! This function allocate space from the core/heap 
+ */
+/*************************************************************************/
+void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes)
+{
+  void *ptr;
+
+  /* pad to make pointers 8-byte aligned */
+  nbytes += (nbytes%8 == 0 ? 0 : 8 - nbytes%8);
+
+  if (mcore->corecpos + nbytes < mcore->coresize) {
+    /* service this request from the core */
+    ptr = ((char *)mcore->core)+mcore->corecpos;
+    mcore->corecpos += nbytes;
+
+    gk_mcoreAdd(mcore, GK_MOPT_CORE, nbytes, ptr);
+  }
+  else {
+    /* service this request from the heap */
+    ptr = gk_malloc(nbytes, "gk_mcoremalloc: ptr");
+
+    gk_mcoreAdd(mcore, GK_MOPT_HEAP, nbytes, ptr);
+  }
+
+  /*
+  printf("MCMALLOC: %zu %d %8zu\n", mcore->cmop-1, 
+      mcore->mops[mcore->cmop-1].type, mcore->mops[mcore->cmop-1].nbytes);
+  */
+
+  return ptr;
+}
+
+
+/*************************************************************************/
+/*! This function sets a marker in the stack of malloc ops to be used
+    subsequently for freeing purposes 
+ */
+/*************************************************************************/
+void gk_mcorePush(gk_mcore_t *mcore)
+{
+  gk_mcoreAdd(mcore, GK_MOPT_MARK, 0, NULL);
+  /* printf("MCPPUSH:   %zu\n", mcore->cmop-1); */
+}
+
+
+/*************************************************************************/
+/*! This function sets a marker in the stack of malloc ops to be used
+    subsequently for freeing purposes. This is the gkmcore version.
+ */
+/*************************************************************************/
+void gk_gkmcorePush(gk_mcore_t *mcore)
+{
+  gk_gkmcoreAdd(mcore, GK_MOPT_MARK, 0, NULL);
+  /* printf("MCPPUSH:   %zu\n", mcore->cmop-1); */
+}
+
+
+/*************************************************************************/
+/*! This function frees all mops since the last push 
+ */
+/*************************************************************************/
+void gk_mcorePop(gk_mcore_t *mcore)
+{
+  while (mcore->cmop > 0) {
+    mcore->cmop--;
+    switch (mcore->mops[mcore->cmop].type) {
+      case GK_MOPT_MARK: /* push marker */
+        goto DONE;
+        break; 
+
+      case GK_MOPT_CORE: /* core free */
+        if (mcore->corecpos < mcore->mops[mcore->cmop].nbytes)
+          errexit("Internal Error: wspace's core is about to be over-freed [%zu, %zu, %zd]\n",
+              mcore->coresize, mcore->corecpos, mcore->mops[mcore->cmop].nbytes);
+
+        mcore->corecpos    -= mcore->mops[mcore->cmop].nbytes;
+        mcore->cur_callocs -= mcore->mops[mcore->cmop].nbytes;
+        break;
+
+      case GK_MOPT_HEAP: /* heap free */
+        gk_free((void **)&mcore->mops[mcore->cmop].ptr, LTERM);
+        mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes;
+        break;
+
+      default:
+        gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type);
+    }
+  }
+
+DONE:
+  ;
+  /*printf("MCPPOP:    %zu\n", mcore->cmop); */
+}
+
+
+/*************************************************************************/
+/*! This function frees all mops since the last push. This version is
+    for poping the gkmcore and it uses free instead of gk_free.
+ */
+/*************************************************************************/
+void gk_gkmcorePop(gk_mcore_t *mcore)
+{
+  while (mcore->cmop > 0) {
+    mcore->cmop--;
+    switch (mcore->mops[mcore->cmop].type) {
+      case GK_MOPT_MARK: /* push marker */
+        goto DONE;
+        break; 
+
+      case GK_MOPT_HEAP: /* heap free */
+        free(mcore->mops[mcore->cmop].ptr);
+        mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes;
+        break;
+
+      default:
+        gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type);
+    }
+  }
+
+DONE:
+  ;
+}
+
+
+/*************************************************************************/
+/*! Adds a memory allocation at the end of the list.
+ */
+/*************************************************************************/
+void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr)
+{
+  if (mcore->cmop == mcore->nmops) {
+    mcore->nmops *= 2;
+    mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t));
+    if (mcore->mops == NULL) 
+      gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n");
+  }
+
+  mcore->mops[mcore->cmop].type   = type;
+  mcore->mops[mcore->cmop].nbytes = nbytes;
+  mcore->mops[mcore->cmop].ptr    = ptr;
+  mcore->cmop++;
+
+  switch (type) {
+    case GK_MOPT_MARK:
+      break;
+
+    case GK_MOPT_CORE:
+      mcore->num_callocs++;
+      mcore->size_callocs += nbytes;
+      mcore->cur_callocs  += nbytes;
+      if (mcore->max_callocs < mcore->cur_callocs)
+        mcore->max_callocs = mcore->cur_callocs;
+      break;
+
+    case GK_MOPT_HEAP:
+      mcore->num_hallocs++;
+      mcore->size_hallocs += nbytes;
+      mcore->cur_hallocs  += nbytes;
+      if (mcore->max_hallocs < mcore->cur_hallocs)
+        mcore->max_hallocs = mcore->cur_hallocs;
+      break;
+    default:
+      gk_errexit(SIGMEM, "Incorrect mcore type operation.\n");
+  }
+}
+
+
+/*************************************************************************/
+/*! Adds a memory allocation at the end of the list. This is the gkmcore
+    version.
+ */
+/*************************************************************************/
+void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr)
+{
+  if (mcore->cmop == mcore->nmops) {
+    mcore->nmops *= 2;
+    mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t));
+    if (mcore->mops == NULL) 
+      gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n");
+  }
+
+  mcore->mops[mcore->cmop].type   = type;
+  mcore->mops[mcore->cmop].nbytes = nbytes;
+  mcore->mops[mcore->cmop].ptr    = ptr;
+  mcore->cmop++;
+
+  switch (type) {
+    case GK_MOPT_MARK:
+      break;
+
+    case GK_MOPT_HEAP:
+      mcore->num_hallocs++;
+      mcore->size_hallocs += nbytes;
+      mcore->cur_hallocs  += nbytes;
+      if (mcore->max_hallocs < mcore->cur_hallocs)
+        mcore->max_hallocs = mcore->cur_hallocs;
+      break;
+    default:
+      gk_errexit(SIGMEM, "Incorrect mcore type operation.\n");
+  }
+}
+
+
+/*************************************************************************/
+/*! This function deletes the mop associated with the supplied pointer.
+    The mop has to be a heap allocation, otherwise it fails violently.
+ */
+/*************************************************************************/
+void gk_mcoreDel(gk_mcore_t *mcore, void *ptr)
+{
+  int i;
+
+  for (i=mcore->cmop-1; i>=0; i--) {
+    if (mcore->mops[i].type == GK_MOPT_MARK)
+      gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr);
+
+    if (mcore->mops[i].ptr == ptr) {
+      if (mcore->mops[i].type != GK_MOPT_HEAP)
+        gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n");
+
+      mcore->cur_hallocs -= mcore->mops[i].nbytes;
+      mcore->mops[i] = mcore->mops[--mcore->cmop];
+      return;
+    }
+  }
+
+  gk_errexit(SIGMEM, "mcoreDel should never have been here!\n");
+}
+
+
+/*************************************************************************/
+/*! This function deletes the mop associated with the supplied pointer.
+    The mop has to be a heap allocation, otherwise it fails violently.
+    This is the gkmcore version.
+ */
+/*************************************************************************/
+void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr)
+{
+  int i;
+
+  for (i=mcore->cmop-1; i>=0; i--) {
+    if (mcore->mops[i].type == GK_MOPT_MARK)
+      gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr);
+
+    if (mcore->mops[i].ptr == ptr) {
+      if (mcore->mops[i].type != GK_MOPT_HEAP)
+        gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n");
+
+      mcore->cur_hallocs -= mcore->mops[i].nbytes;
+      mcore->mops[i] = mcore->mops[--mcore->cmop];
+      return;
+    }
+  }
+
+  gk_errexit(SIGMEM, "gkmcoreDel should never have been here!\n");
+}
+
diff --git a/memory.c b/memory.c
new file mode 100644
index 0000000..e6dc99c
--- /dev/null
+++ b/memory.c
@@ -0,0 +1,307 @@
+/*!
+\file  memory.c
+\brief This file contains various allocation routines 
+
+The allocation routines included are for 1D and 2D arrays of the 
+most datatypes that GKlib support. Many of these routines are 
+defined with the help of the macros in gk_memory.h. These macros 
+can be used to define other memory allocation routines.
+
+\date   Started 4/3/2007
+\author George
+\version\verbatim $Id: memory.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+/* This is for the global mcore that tracks all heap allocations */
+static __thread gk_mcore_t *gkmcore = NULL;
+
+
+/*************************************************************************/
+/*! Define the set of memory allocation routines for each data type */
+/**************************************************************************/
+GK_MKALLOC(gk_c,    char)
+GK_MKALLOC(gk_i,    int)
+GK_MKALLOC(gk_i8,   int8_t)
+GK_MKALLOC(gk_i16,  int16_t)
+GK_MKALLOC(gk_i32,  int32_t)
+GK_MKALLOC(gk_i64,  int64_t)
+GK_MKALLOC(gk_ui8,  uint8_t)
+GK_MKALLOC(gk_ui16, uint16_t)
+GK_MKALLOC(gk_ui32, uint32_t)
+GK_MKALLOC(gk_ui64, uint64_t)
+GK_MKALLOC(gk_z,    ssize_t)
+GK_MKALLOC(gk_zu,   size_t)
+GK_MKALLOC(gk_f,    float)
+GK_MKALLOC(gk_d,    double)
+GK_MKALLOC(gk_idx,  gk_idx_t)
+
+GK_MKALLOC(gk_ckv,   gk_ckv_t)
+GK_MKALLOC(gk_ikv,   gk_ikv_t)
+GK_MKALLOC(gk_i8kv,  gk_i8kv_t)
+GK_MKALLOC(gk_i16kv, gk_i16kv_t)
+GK_MKALLOC(gk_i32kv, gk_i32kv_t)
+GK_MKALLOC(gk_i64kv, gk_i64kv_t)
+GK_MKALLOC(gk_zkv,   gk_zkv_t)
+GK_MKALLOC(gk_zukv,  gk_zukv_t)
+GK_MKALLOC(gk_fkv,   gk_fkv_t)
+GK_MKALLOC(gk_dkv,   gk_dkv_t)
+GK_MKALLOC(gk_skv,   gk_skv_t)
+GK_MKALLOC(gk_idxkv, gk_idxkv_t)
+
+
+
+
+
+
+/*************************************************************************/
+/*! This function allocates a two-dimensional matrix.
+  */
+/*************************************************************************/
+void gk_AllocMatrix(void ***r_matrix, size_t elmlen, size_t ndim1, size_t ndim2)
+{
+  size_t i, j;
+  void **matrix;
+
+  *r_matrix = NULL;
+
+  if ((matrix = (void **)gk_malloc(ndim1*sizeof(void *), "gk_AllocMatrix: matrix")) == NULL)
+    return;
+
+  for (i=0; i<ndim1; i++) {
+    if ((matrix[i] = (void *)gk_malloc(ndim2*elmlen, "gk_AllocMatrix: matrix[i]")) == NULL) {
+      for (j=0; j<i; j++) 
+        gk_free((void **)&matrix[j], LTERM);
+      return;
+    }
+  }
+
+  *r_matrix = matrix;
+}
+
+
+/*************************************************************************/
+/*! This function frees a two-dimensional matrix.
+  */
+/*************************************************************************/
+void gk_FreeMatrix(void ***r_matrix, size_t ndim1, size_t ndim2)
+{
+  size_t i;
+  void **matrix;
+
+  if ((matrix = *r_matrix) == NULL)
+    return;
+
+  for (i=0; i<ndim1; i++) 
+    gk_free((void **)&matrix[i], LTERM);
+
+  gk_free((void **)r_matrix, LTERM); 
+
+}
+
+
+/*************************************************************************/
+/*! This function initializes tracking of heap allocations. 
+*/
+/*************************************************************************/
+int gk_malloc_init()
+{
+  if (gkmcore == NULL)
+    gkmcore = gk_gkmcoreCreate();
+
+  if (gkmcore == NULL)
+    return 0;
+
+  gk_gkmcorePush(gkmcore);
+
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function frees the memory that has been allocated since the
+    last call to gk_malloc_init().
+*/
+/*************************************************************************/
+void gk_malloc_cleanup(int showstats)
+{
+  if (gkmcore != NULL) {
+    gk_gkmcorePop(gkmcore);
+    if (gkmcore->cmop == 0) {
+      gk_gkmcoreDestroy(&gkmcore, showstats);
+      gkmcore = NULL;
+    }
+  }
+}
+
+
+/*************************************************************************/
+/*! This function is my wrapper around malloc that provides the following
+    enhancements over malloc:
+    * It always allocates one byte of memory, even if 0 bytes are requested.
+      This is to ensure that checks of returned values do not lead to NULL
+      due to 0 bytes requested.
+    * It zeros-out the memory that is allocated. This is for a quick init
+      of the underlying datastructures.
+*/
+/**************************************************************************/
+void *gk_malloc(size_t nbytes, char *msg)
+{
+  void *ptr=NULL;
+
+  if (nbytes == 0)
+    nbytes++;  /* Force mallocs to actually allocate some memory */
+
+  ptr = (void *)malloc(nbytes);
+
+  if (ptr == NULL) {
+    fprintf(stderr, "   Current memory used:  %10zu bytes\n", gk_GetCurMemoryUsed());
+    fprintf(stderr, "   Maximum memory used:  %10zu bytes\n", gk_GetMaxMemoryUsed());
+    gk_errexit(SIGMEM, "***Memory allocation failed for %s. Requested size: %zu bytes", 
+        msg, nbytes);
+    return NULL;
+  }
+
+  /* add this memory allocation */
+  if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr);
+
+  return ptr;
+}
+
+
+/*************************************************************************
+* This function is my wrapper around realloc
+**************************************************************************/
+void *gk_realloc(void *oldptr, size_t nbytes, char *msg)
+{
+  void *ptr=NULL;
+
+  if (nbytes == 0)
+    nbytes++;  /* Force mallocs to actually allocate some memory */
+
+  /* remove this memory de-allocation */
+  if (gkmcore != NULL && oldptr != NULL) gk_gkmcoreDel(gkmcore, oldptr);
+
+  ptr = (void *)realloc(oldptr, nbytes);
+
+  if (ptr == NULL) {
+    fprintf(stderr, "   Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed());
+    fprintf(stderr, "   Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed());
+    gk_errexit(SIGMEM, "***Memory realloc failed for %s. " "Requested size: %zu bytes", 
+        msg, nbytes);
+    return NULL;
+  }
+
+  /* add this memory allocation */
+  if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr);
+
+  return ptr;
+}
+
+
+/*************************************************************************
+* This function is my wrapper around free, allows multiple pointers    
+**************************************************************************/
+void gk_free(void **ptr1,...)
+{
+  va_list plist;
+  void **ptr;
+
+  if (*ptr1 != NULL) {
+    free(*ptr1);
+
+    /* remove this memory de-allocation */
+    if (gkmcore != NULL) 
+      gk_gkmcoreDel(gkmcore, *ptr1);
+  }
+  *ptr1 = NULL;
+
+  va_start(plist, ptr1);
+  while ((ptr = va_arg(plist, void **)) != LTERM) {
+    if (*ptr != NULL) {
+      free(*ptr);
+
+      /* remove this memory de-allocation */
+      if (gkmcore != NULL) 
+        gk_gkmcoreDel(gkmcore, *ptr);
+    }
+    *ptr = NULL;
+  }
+  va_end(plist);
+}          
+
+
+/*************************************************************************
+* This function returns the current ammount of dynamically allocated
+* memory that is used by the system
+**************************************************************************/
+size_t gk_GetCurMemoryUsed()
+{
+  if (gkmcore == NULL)
+    return 0;
+  else
+    return gkmcore->cur_hallocs;
+}
+
+
+/*************************************************************************
+* This function returns the maximum ammount of dynamically allocated 
+* memory that was used by the system
+**************************************************************************/
+size_t gk_GetMaxMemoryUsed()
+{
+  if (gkmcore == NULL)
+    return 0;
+  else
+    return gkmcore->max_hallocs;
+}
+
+
+/*************************************************************************/
+/*! This function returns the VmSize and VmRSS of the calling process. */
+/*************************************************************************/
+void gk_GetVMInfo(size_t *vmsize, size_t *vmrss)
+{
+  FILE *fp;
+  char fname[1024];
+
+  sprintf(fname, "/proc/%d/statm", getpid());
+  fp = gk_fopen(fname, "r", "proc/pid/statm");
+  if (fscanf(fp, "%zu %zu", vmsize, vmrss) != 2)
+    errexit("Failed to read to values from %s\n", fname);
+  gk_fclose(fp);
+
+  /*
+  *vmsize *= sysconf(_SC_PAGESIZE);
+  *vmrss  *= sysconf(_SC_PAGESIZE);
+  */
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! This function returns the peak virtual memory of the calling process
+    by reading the VmPeak field in /proc/self/status . */
+/*************************************************************************/
+size_t gk_GetProcVmPeak()
+{
+  FILE *fp;
+  char line[128];
+  size_t vmpeak=0;
+
+  if (gk_fexists("/proc/self/status")) {
+    fp = gk_fopen("/proc/self/status", "r", "proc/self/status");
+    while (fgets(line, 128, fp) != NULL) {
+      if (strncmp(line, "VmPeak:", 7) == 0) {
+        vmpeak = atoll(line+8)*1024;
+        break;
+      }
+    }
+    gk_fclose(fp);
+  }
+
+  return vmpeak;
+}
diff --git a/pqueue.c b/pqueue.c
new file mode 100644
index 0000000..2fb8515
--- /dev/null
+++ b/pqueue.c
@@ -0,0 +1,25 @@
+/*!
+\file  pqueue.c
+\brief This file implements various max-priority queues.
+
+The priority queues are generated using the GK_MKPQUEUE macro.
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: pqueue.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Create the various max priority queues */
+/*************************************************************************/
+#define key_gt(a, b) ((a) > (b))
+GK_MKPQUEUE(gk_ipq,   gk_ipq_t,   gk_ikv_t,   int,      gk_idx_t, gk_ikvmalloc,   INT_MAX,    key_gt)
+GK_MKPQUEUE(gk_i32pq, gk_i32pq_t, gk_i32kv_t, int32_t,  gk_idx_t, gk_i32kvmalloc, INT32_MAX,  key_gt)
+GK_MKPQUEUE(gk_i64pq, gk_i64pq_t, gk_i64kv_t, int64_t,  gk_idx_t, gk_i64kvmalloc, INT64_MAX,  key_gt)
+GK_MKPQUEUE(gk_fpq,   gk_fpq_t,   gk_fkv_t,   float,    gk_idx_t, gk_fkvmalloc,   FLT_MAX,    key_gt)
+GK_MKPQUEUE(gk_dpq,   gk_dpq_t,   gk_dkv_t,   double,   gk_idx_t, gk_dkvmalloc,   DBL_MAX,    key_gt)
+GK_MKPQUEUE(gk_idxpq, gk_idxpq_t, gk_idxkv_t, gk_idx_t, gk_idx_t, gk_idxkvmalloc, GK_IDX_MAX, key_gt)
+#undef key_gt
diff --git a/random.c b/random.c
new file mode 100644
index 0000000..3698614
--- /dev/null
+++ b/random.c
@@ -0,0 +1,136 @@
+/*!
+\file  
+\brief Various routines for providing portable 32 and 64 bit random number
+       generators.
+
+\date   Started 5/17/2007
+\author George
+\version\verbatim $Id: random.c 18796 2015-06-02 11:39:45Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Create the various random number functions */
+/*************************************************************************/
+GK_MKRANDOM(gk_c,   size_t, char)
+GK_MKRANDOM(gk_i,   size_t, int)
+GK_MKRANDOM(gk_i32, size_t, int32_t)
+GK_MKRANDOM(gk_f,   size_t, float)
+GK_MKRANDOM(gk_d,   size_t, double)
+GK_MKRANDOM(gk_idx, size_t, gk_idx_t)
+GK_MKRANDOM(gk_z,   size_t, ssize_t)
+GK_MKRANDOM(gk_zu,  size_t, size_t)
+
+
+
+/*************************************************************************/
+/*! GKlib's built in random number generator for portability across 
+    different architectures */
+/*************************************************************************/
+#ifdef USE_GKRAND
+/* 
+   A C-program for MT19937-64 (2004/9/29 version).
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   This is a 64-bit version of Mersenne Twister pseudorandom number
+   generator.
+
+   Before using, initialize the state by using init_genrand64(seed)  
+   or init_by_array64(init_key, key_length).
+
+   Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.                          
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define NN 312
+#define MM 156
+#define MATRIX_A 0xB5026F5AA96619E9ULL
+#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */
+#define LM 0x7FFFFFFFULL /* Least significant 31 bits */
+
+
+/* The array for the state vector */
+static uint64_t mt[NN]; 
+/* mti==NN+1 means mt[NN] is not initialized */
+static int mti=NN+1; 
+#endif /* USE_GKRAND */
+
+/* initializes mt[NN] with a seed */
+void gk_randinit(uint64_t seed)
+{
+#ifdef USE_GKRAND
+  mt[0] = seed;
+  for (mti=1; mti<NN; mti++) 
+    mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti);
+#else
+  srand((unsigned int) seed);
+#endif
+}
+
+
+/* generates a random number on [0, 2^64-1]-interval */
+uint64_t gk_randint64(void)
+{
+#ifdef USE_GKRAND
+  int i;
+  unsigned long long x;
+  static uint64_t mag01[2]={0ULL, MATRIX_A};
+
+  if (mti >= NN) { /* generate NN words at one time */
+    /* if init_genrand64() has not been called, */
+    /* a default initial seed is used     */
+    if (mti == NN+1) 
+      gk_randinit(5489ULL); 
+
+    for (i=0; i<NN-MM; i++) {
+      x = (mt[i]&UM)|(mt[i+1]&LM);
+      mt[i] = mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+    }
+    for (; i<NN-1; i++) {
+      x = (mt[i]&UM)|(mt[i+1]&LM);
+      mt[i] = mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+    }
+    x = (mt[NN-1]&UM)|(mt[0]&LM);
+    mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+
+    mti = 0;
+  }
+
+  x = mt[mti++];
+
+  x ^= (x >> 29) & 0x5555555555555555ULL;
+  x ^= (x << 17) & 0x71D67FFFEDA60000ULL;
+  x ^= (x << 37) & 0xFFF7EEE000000000ULL;
+  x ^= (x >> 43);
+
+  return x & 0x7FFFFFFFFFFFFFFF;
+#else
+  return (uint64_t)(((uint64_t) rand()) << 32 | ((uint64_t) rand()));
+#endif
+}
+
+/* generates a random number on [0, 2^32-1]-interval */
+uint32_t gk_randint32(void)
+{
+#ifdef USE_GKRAND
+  return (uint32_t)(gk_randint64() & 0x7FFFFFFF);
+#else
+  return (uint32_t)rand();
+#endif
+}
+
+
diff --git a/rw.c b/rw.c
new file mode 100644
index 0000000..7cd4391
--- /dev/null
+++ b/rw.c
@@ -0,0 +1,103 @@
+/*!
+ * \file 
+ *
+ * \brief Various routines that perform random-walk based operations
+          on graphs stored as gk_csr_t matrices.
+ *
+ * \author George Karypis
+ * \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Computes the (personalized) page-rank of the vertices in a graph.
+
+  \param mat is the matrix storing the graph.
+  \param lamda is the restart probability.
+  \param eps is the error tolerance for convergance.
+  \param max_niter is the maximum number of allowed iterations.
+  \param pr on entry stores the restart distribution of the vertices. 
+         This allows for the computation of personalized page-rank scores 
+         by appropriately setting that parameter. 
+         On return, pr stores the computed page ranks.
+ 
+  \returns the number of iterations that were performed.
+*/
+/**************************************************************************/
+int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr)
+{
+  ssize_t i, j, k, iter, nrows;
+  double *rscale, *prold, *prnew, *prtmp;
+  double fromsinks, error;
+  ssize_t *rowptr;
+  int *rowind;
+  float *rowval;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  prold  = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew");
+  prnew  = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold");
+  rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale");
+
+  /* compute the scaling factors to get adjacency weights into transition 
+     probabilities */
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++)
+      rscale[i] += rowval[j];
+    if (rscale[i] > 0)
+      rscale[i] = 1.0/rscale[i];
+  }
+
+  /* the restart distribution is the initial pr scores */
+  for (i=0; i<nrows; i++)
+    prnew[i] = pr[i];
+
+  /* get into the PR iteration */
+  for (iter=0; iter<max_niter; iter++) {
+    gk_SWAP(prnew, prold, prtmp);
+    gk_dset(nrows, 0.0, prnew);
+
+    /* determine the total current PR score of the sinks so that you 
+       can distribute them to all nodes according to the restart 
+       distribution. */
+    for (fromsinks=0.0, i=0; i<nrows; i++) {
+      if (rscale[i] == 0) 
+        fromsinks += prold[i];
+    }
+
+    /* push random-walk scores to the outlinks */
+    for (i=0; i<nrows; i++) {
+      for (j=rowptr[i]; j<rowptr[i+1]; j++)
+        prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j];
+    }
+
+    /* apply the restart conditions */
+    for (i=0; i<nrows; i++) {
+      prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i];
+    }
+
+    /* compute the error */
+    for (error=0.0, i=0; i<nrows; i++) 
+      error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error);
+
+    //printf("nrm1: %le  maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error);
+
+    if (error < eps)
+      break;
+  }
+
+  /* store the computed pr scores into pr for output */
+  for (i=0; i<nrows; i++)
+    pr[i] = prnew[i];
+
+  gk_free((void **)&prnew, &prold, &rscale, LTERM);
+  
+  return (int)(iter+1);
+
+}
+
diff --git a/scripts/gexpand.pl b/scripts/gexpand.pl
new file mode 100644
index 0000000..2b82134
--- /dev/null
+++ b/scripts/gexpand.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl -w
+
+die "Usage $0 <gfile> <ncopies>\n" unless @ARGV == 2;
+
+$filein  = shift(@ARGV);
+$ncopies = shift(@ARGV);
+
+open(FPIN, "<$filein") or die "Could not open $filein. $!\n";
+
+$_ = <FPIN>;
+chomp($_);
+($nvtxs, $nedges) = split(' ', $_);
+
+#print "nvtxs: $nvtxs, nedges: $nedges\n";
+
+$u = 1;
+while (<FPIN>) {
+  chomp($_);
+  @edges = split(' ', $_);
+
+  # put the within layer edges
+  foreach $v (@edges) {
+    next if $v < $u;
+    for ($i=0; $i<$ncopies; $i++) {
+      printf("%d %d\n", $i*$nvtxs+$u-1, $i*$nvtxs+$v-1);
+      printf("%d %d\n", $i*$nvtxs+$v-1, $i*$nvtxs+$u-1);
+    }
+  }
+
+  # put the vertex across layer edges
+  for ($i=0; $i<$ncopies-1; $i++) {
+    printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$u-1);
+    printf("%d %d\n", ($i+1)*$nvtxs+$u-1, $i*$nvtxs+$u-1);
+  }
+
+  # put the adjacent across layer edges
+  for ($i=0; $i<$ncopies-1; $i++) {
+    $j=0;
+    foreach $v (@edges) {
+      $j++;
+      next if (($j+$i)%2 == 0);
+      printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$v-1);
+      printf("%d %d\n", ($i+1)*$nvtxs+$v-1, $i*$nvtxs+$u-1);
+    }
+  }
+
+  goto DONE;
+
+DONE:
+  $u++;
+}
+
+close(FPIN);
diff --git a/seq.c b/seq.c
new file mode 100644
index 0000000..f267a3e
--- /dev/null
+++ b/seq.c
@@ -0,0 +1,174 @@
+/*
+ *
+ * Sequence handler library by Huzefa Rangwala
+ * Date : 03.01.2007
+ *
+ *
+ *
+ */
+
+
+#include <GKlib.h>
+
+
+
+
+/*********************************************************/
+/* ! \brief Initializes the <tt>gk_seq_t</tt> variable
+
+
+
+
+\param A pointer to gk_seq_t itself
+\returns null
+*/
+/***********************************************************************/
+
+void gk_seq_init(gk_seq_t *seq)
+{
+    
+    seq->len = 0;
+    seq->sequence = NULL;
+        
+    seq->pssm = NULL;
+    seq->psfm = NULL;
+    
+    seq->name = NULL;
+    
+}
+
+/***********************************************************************/
+/*! \brief This function creates the localizations for the various sequences
+
+\param    string i.e amino acids, nucleotides, sequences
+\returns  gk_i2cc2i_t variable
+*/
+/*********************************************************************/
+
+gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
+{
+    
+    
+    int nsymbols;
+    gk_idx_t i;
+    gk_i2cc2i_t *t;
+
+    nsymbols = strlen(alphabet);
+    t        = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
+    t->n     = nsymbols;
+    t->i2c   = gk_cmalloc(256, "gk_i2c_create_common");
+    t->c2i   = gk_imalloc(256, "gk_i2c_create_common");
+    
+
+    gk_cset(256, -1, t->i2c);
+    gk_iset(256, -1, t->c2i);
+    
+    for(i=0;i<nsymbols;i++){
+	t->i2c[i] = alphabet[i];
+	t->c2i[(int)alphabet[i]] = i;
+    }
+
+    return t;
+
+}
+
+
+/*********************************************************************/
+/*! \brief This function reads a pssm in the format of gkmod pssm
+
+\param file_name is the name of the pssm file
+\returns gk_seq_t
+*/
+/********************************************************************/
+gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
+{
+    gk_seq_t *seq;
+    gk_idx_t i, j, ii;
+    size_t ntokens, nbytes, len;
+    FILE *fpin;
+    
+    
+    gk_Tokens_t tokens;
+    static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
+    static int PSSMWIDTH = 20;
+    char *header, line[MAXLINELEN];
+    gk_i2cc2i_t *converter;
+
+    header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
+    
+    converter = gk_i2cc2i_create_common(AAORDER);
+    
+    gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
+    len --;
+
+    seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
+    gk_seq_init(seq);
+    
+    seq->len = len;
+    seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
+    seq->pssm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
+    seq->psfm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
+    
+    seq->nsymbols = PSSMWIDTH;
+    seq->name     = gk_getbasename(filename);
+    
+    fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
+
+
+    /* Read the header line */
+    if (fgets(line, MAXLINELEN-1, fpin) == NULL)
+      errexit("Unexpected end of file: %s\n", filename);
+    gk_strtoupper(line);
+    gk_strtokenize(line, " \t\n", &tokens);
+
+    for (i=0; i<PSSMWIDTH; i++)
+	header[i] = tokens.list[i][0];
+    
+    gk_freetokenslist(&tokens);
+    
+
+    /* Read the rest of the lines */
+    for (i=0, ii=0; ii<len; ii++) {
+	if (fgets(line, MAXLINELEN-1, fpin) == NULL)
+          errexit("Unexpected end of file: %s\n", filename);
+	gk_strtoupper(line);
+	gk_strtokenize(line, " \t\n", &tokens);
+	
+	seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
+	
+	for (j=0; j<PSSMWIDTH; j++) {
+	    seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
+	    seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
+	}
+	
+      
+	
+	gk_freetokenslist(&tokens);
+	i++;
+    }
+    
+    seq->len = i; /* Reset the length if certain characters were skipped */
+    
+    gk_free((void **)&header, LTERM);
+    gk_fclose(fpin);
+
+    return seq;
+}
+
+
+/**************************************************************************/
+/*! \brief This function frees the memory allocated to the seq structure.
+ 
+\param   gk_seq_t
+\returns nothing
+*/
+/**************************************************************************/
+void gk_seq_free(gk_seq_t *seq)
+{
+    gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
+    gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
+    gk_free((void **)&seq->name, &seq->sequence, LTERM);
+    //gk_free((void **)&seq, LTERM);
+    gk_free((void **) &seq, LTERM);
+
+}
diff --git a/sort.c b/sort.c
new file mode 100644
index 0000000..f0144ae
--- /dev/null
+++ b/sort.c
@@ -0,0 +1,437 @@
+/*!
+\file  sort.c
+\brief This file contains GKlib's various sorting routines
+
+These routines are implemented using the GKSORT macro that is defined
+in gk_qsort.h and is based on GNU's GLIBC qsort() implementation.
+
+Additional sorting routines can be created using the same way that
+these routines where defined.
+
+\date   Started 4/4/07
+\author George
+\version\verbatim $Id: sort.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************/
+/*! Sorts an array of chars in increasing order */
+/*************************************************************************/
+void gk_csorti(size_t n, char *base)
+{
+#define char_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(char, base, n, char_lt);
+#undef char_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of chars in decreasing order */
+/*************************************************************************/
+void gk_csortd(size_t n, char *base)
+{
+#define char_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(char, base, n, char_gt);
+#undef char_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_isorti(size_t n, int *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(int, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_isortd(size_t n, int *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(int, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_i32sorti(size_t n, int32_t *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(int32_t, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_i32sortd(size_t n, int32_t *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(int32_t, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_i64sorti(size_t n, int64_t *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(int64_t, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_ui32sorti(size_t n, uint32_t *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(uint32_t, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_ui32sortd(size_t n, uint32_t *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(uint32_t, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_ui64sorti(size_t n, uint64_t *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(uint64_t, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_ui64sortd(size_t n, uint64_t *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(uint64_t, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_i64sortd(size_t n, int64_t *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(int64_t, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of floats in increasing order */
+/*************************************************************************/
+void gk_fsorti(size_t n, float *base)
+{
+#define float_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(float, base, n, float_lt);
+#undef float_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of floats in decreasing order */
+/*************************************************************************/
+void gk_fsortd(size_t n, float *base)
+{
+#define float_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(float, base, n, float_gt);
+#undef float_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of doubles in increasing order */
+/*************************************************************************/
+void gk_dsorti(size_t n, double *base)
+{
+#define double_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(double, base, n, double_lt);
+#undef double_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of doubles in decreasing order */
+/*************************************************************************/
+void gk_dsortd(size_t n, double *base)
+{
+#define double_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(double, base, n, double_gt);
+#undef double_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idx_t in increasing order */
+/*************************************************************************/
+void gk_idxsorti(size_t n, gk_idx_t *base)
+{
+#define idx_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(gk_idx_t, base, n, idx_lt);
+#undef idx_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idx_t in decreasing order */
+/*************************************************************************/
+void gk_idxsortd(size_t n, gk_idx_t *base)
+{
+#define idx_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(gk_idx_t, base, n, idx_gt);
+#undef idx_gt
+}
+
+
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ckv_t in increasing order */
+/*************************************************************************/
+void gk_ckvsorti(size_t n, gk_ckv_t *base)
+{
+#define ckey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_ckv_t, base, n, ckey_lt);
+#undef ckey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ckv_t in decreasing order */
+/*************************************************************************/
+void gk_ckvsortd(size_t n, gk_ckv_t *base)
+{
+#define ckey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_ckv_t, base, n, ckey_gt);
+#undef ckey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ikv_t in increasing order */
+/*************************************************************************/
+void gk_ikvsorti(size_t n, gk_ikv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_ikv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ikv_t in decreasing order */
+/*************************************************************************/
+void gk_ikvsortd(size_t n, gk_ikv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_ikv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i32kv_t in increasing order */
+/*************************************************************************/
+void gk_i32kvsorti(size_t n, gk_i32kv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_i32kv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i32kv_t in decreasing order */
+/*************************************************************************/
+void gk_i32kvsortd(size_t n, gk_i32kv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_i32kv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i64kv_t in increasing order */
+/*************************************************************************/
+void gk_i64kvsorti(size_t n, gk_i64kv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_i64kv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i64kv_t in decreasing order */
+/*************************************************************************/
+void gk_i64kvsortd(size_t n, gk_i64kv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_i64kv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zkv_t in increasing order */
+/*************************************************************************/
+void gk_zkvsorti(size_t n, gk_zkv_t *base)
+{
+#define zkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_zkv_t, base, n, zkey_lt);
+#undef zkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zkv_t in decreasing order */
+/*************************************************************************/
+void gk_zkvsortd(size_t n, gk_zkv_t *base)
+{
+#define zkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_zkv_t, base, n, zkey_gt);
+#undef zkey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zukv_t in increasing order */
+/*************************************************************************/
+void gk_zukvsorti(size_t n, gk_zukv_t *base)
+{
+#define zukey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_zukv_t, base, n, zukey_lt);
+#undef zukey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zukv_t in decreasing order */
+/*************************************************************************/
+void gk_zukvsortd(size_t n, gk_zukv_t *base)
+{
+#define zukey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_zukv_t, base, n, zukey_gt);
+#undef zukey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_fkv_t in increasing order */
+/*************************************************************************/
+void gk_fkvsorti(size_t n, gk_fkv_t *base)
+{
+#define fkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_fkv_t, base, n, fkey_lt);
+#undef fkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_fkv_t in decreasing order */
+/*************************************************************************/
+void gk_fkvsortd(size_t n, gk_fkv_t *base)
+{
+#define fkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_fkv_t, base, n, fkey_gt);
+#undef fkey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_dkv_t in increasing order */
+/*************************************************************************/
+void gk_dkvsorti(size_t n, gk_dkv_t *base)
+{
+#define dkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_dkv_t, base, n, dkey_lt);
+#undef dkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_fkv_t in decreasing order */
+/*************************************************************************/
+void gk_dkvsortd(size_t n, gk_dkv_t *base)
+{
+#define dkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_dkv_t, base, n, dkey_gt);
+#undef dkey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_skv_t in increasing order */
+/*************************************************************************/
+void gk_skvsorti(size_t n, gk_skv_t *base)
+{
+#define skey_lt(a, b) (strcmp((a)->key, (b)->key) < 0)
+  GK_MKQSORT(gk_skv_t, base, n, skey_lt);
+#undef skey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_skv_t in decreasing order */
+/*************************************************************************/
+void gk_skvsortd(size_t n, gk_skv_t *base)
+{
+#define skey_gt(a, b) (strcmp((a)->key, (b)->key) > 0)
+  GK_MKQSORT(gk_skv_t, base, n, skey_gt);
+#undef skey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idxkv_t in increasing order */
+/*************************************************************************/
+void gk_idxkvsorti(size_t n, gk_idxkv_t *base)
+{
+#define idxkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_idxkv_t, base, n, idxkey_lt);
+#undef idxkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idxkv_t in decreasing order */
+/*************************************************************************/
+void gk_idxkvsortd(size_t n, gk_idxkv_t *base)
+{
+#define idxkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_idxkv_t, base, n, idxkey_gt);
+#undef idxkey_gt
+}
diff --git a/string.c b/string.c
new file mode 100644
index 0000000..4a3fb14
--- /dev/null
+++ b/string.c
@@ -0,0 +1,530 @@
+/************************************************************************/
+/*! \file 
+
+\brief Functions for manipulating strings.
+
+Various functions for manipulating strings. Some of these functions 
+provide new functionality, whereas others are drop-in replacements
+of standard functions (but with enhanced functionality).
+
+\date Started 11/1/99
+\author George
+\version $Id: string.c 14330 2013-05-18 12:15:15Z karypis $
+*/
+/************************************************************************/
+
+/* the following is for strptime() */
+#define _XOPEN_SOURCE
+#include <time.h>
+#undef _XOPEN_SOURCE
+
+#include <GKlib.h>
+
+
+
+/************************************************************************/
+/*! \brief Replaces certain characters in a string.
+ 
+This function takes a string and replaces all the characters in the
+\c fromlist with the corresponding characters from the \c tolist. 
+That is, each occurence of <tt>fromlist[i]</tt> is replaced by 
+<tt>tolist[i]</tt>. 
+If the \c tolist is shorter than \c fromlist, then the corresponding 
+characters are deleted. The modifications on \c str are done in place. 
+It tries to provide a functionality similar to Perl's \b tr// function.
+
+\param str is the string whose characters will be replaced.
+\param fromlist is the set of characters to be replaced.
+\param tolist is the set of replacement characters .
+\returns A pointer to \c str itself.
+*/
+/************************************************************************/
+char *gk_strchr_replace(char *str, char *fromlist, char *tolist)
+{
+  ssize_t i, j, k, len, fromlen, tolen;
+
+  len     = strlen(str);
+  fromlen = strlen(fromlist);
+  tolen   = strlen(tolist);
+
+  for (i=j=0; i<len; i++) {
+    for (k=0; k<fromlen; k++) {
+      if (str[i] == fromlist[k]) {
+        if (k < tolen) 
+          str[j++] = tolist[k];
+        break;
+      }
+    }
+    if (k == fromlen)
+      str[j++] = str[i];
+  }
+  str[j] = '\0';
+
+  return str;
+}
+
+
+
+/************************************************************************/
+/*! \brief Regex-based search-and-replace function
+ 
+This function is a C implementation of Perl's <tt> s//</tt> regular-expression
+based substitution function.
+
+\param str 
+  is the input string on which the operation will be performed.
+\param pattern
+  is the regular expression for the pattern to be matched for substitution.
+\param replacement
+  is the replacement string, in which the possible captured pattern substrings
+  are referred to as $1, $2, ..., $9. The entire matched pattern is refered
+  to as $0.
+\param options
+  is a string specified options for the substitution operation. Currently the
+  <tt>"i"</tt> (case insensitive) and <tt>"g"</tt> (global substitution) are 
+  supported.
+\param new_str 
+  is a reference to a pointer that will store a pointer to the newly created 
+  string that results from the substitutions. This string is allocated via 
+  gk_malloc() and needs to be freed using gk_free(). The string is returned 
+  even if no substitutions were performed.
+\returns
+  If successful, it returns 1 + the number of substitutions that were performed.
+  Thus, if no substitutions were performed, the returned value will be 1.
+  Otherwise it returns 0. In case of error, a meaningful error message is 
+  returned in <tt>newstr</tt>, which also needs to be freed afterwards.
+*/
+/************************************************************************/
+int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options,
+      char **new_str)
+{
+  ssize_t i, len, rlen, nlen, offset, noffset;
+  int j, rc, flags, global, nmatches;
+  regex_t re;
+  regmatch_t matches[10];
+
+  
+  /* Parse the options */
+  flags = REG_EXTENDED;
+  if (strchr(options, 'i') != NULL)
+    flags = flags | REG_ICASE;
+  global = (strchr(options, 'g') != NULL ? 1 : 0);
+
+
+  /* Compile the regex */
+  if ((rc = regcomp(&re, pattern, flags)) != 0) { 
+    len = regerror(rc, &re, NULL, 0);
+    *new_str = gk_cmalloc(len, "gk_strstr_replace: new_str");
+    regerror(rc, &re, *new_str, len);
+    return 0;
+  }
+
+  /* Prepare the output string */
+  len = strlen(str);
+  nlen = 2*len;
+  noffset = 0;
+  *new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str");
+
+
+  /* Get into the matching-replacing loop */
+  rlen = strlen(replacement);
+  offset = 0;
+  nmatches = 0;
+  do {
+    rc = regexec(&re, str+offset, 10, matches, 0);
+
+    if (rc == REG_ESPACE) {
+      gk_free((void **)new_str, LTERM);
+      *new_str = gk_strdup("regexec ran out of memory.");
+      regfree(&re);
+      return 0;
+    }
+    else if (rc == REG_NOMATCH) {
+      if (nlen-noffset < len-offset) {
+        nlen += (len-offset) - (nlen-noffset);
+        *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+      }
+      strcpy(*new_str+noffset, str+offset);
+      noffset += (len-offset);
+      break;
+    }
+    else { /* A match was found! */
+      nmatches++;
+
+      /* Copy the left unmatched portion of the string */
+      if (matches[0].rm_so > 0) {
+        if (nlen-noffset < matches[0].rm_so) {
+          nlen += matches[0].rm_so - (nlen-noffset);
+          *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+        }
+        strncpy(*new_str+noffset, str+offset, matches[0].rm_so);
+        noffset += matches[0].rm_so;
+      }
+
+      /* Go and append the replacement string */
+      for (i=0; i<rlen; i++) {
+        switch (replacement[i]) {
+          case '\\':
+            if (i+1 < rlen) {
+              if (nlen-noffset < 1) {
+                nlen += nlen + 1;
+                *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+              }
+              *new_str[noffset++] = replacement[++i];
+            }
+            else {
+              gk_free((void **)new_str, LTERM);
+              *new_str = gk_strdup("Error in replacement string. Missing character following '\'.");
+              regfree(&re);
+              return 0;
+            }
+            break;
+
+          case '$':
+            if (i+1 < rlen) {
+              j = (int)(replacement[++i] - '0');
+              if (j < 0 || j > 9) {
+                gk_free((void **)new_str, LTERM);
+                *new_str = gk_strdup("Error in captured subexpression specification.");
+                regfree(&re);
+                return 0;
+              }
+
+              if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) {
+                nlen += nlen + (matches[j].rm_eo-matches[j].rm_so);
+                *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+              }
+
+              strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo);
+              noffset += matches[j].rm_eo-matches[j].rm_so;
+            }
+            else {
+              gk_free((void **)new_str, LTERM);
+              *new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'.");
+              regfree(&re);
+              return 0;
+            }
+            break;
+
+          default:
+            if (nlen-noffset < 1) {
+              nlen += nlen + 1;
+              *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+            }
+            (*new_str)[noffset++] = replacement[i];
+        }
+      }
+
+      /* Update the offset of str for the next match */
+      offset += matches[0].rm_eo;
+
+      if (!global) {
+        /* Copy the right portion of the string if no 'g' option */
+        if (nlen-noffset < len-offset) {
+          nlen += (len-offset) - (nlen-noffset);
+          *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+        }
+        strcpy(*new_str+noffset, str+offset);
+        noffset += (len-offset);
+      }
+    }
+  } while (global);
+
+  (*new_str)[noffset] = '\0';
+
+  regfree(&re);
+  return nmatches + 1;
+
+}
+
+
+
+/************************************************************************/
+/*! \brief Prunes characters from the end of the string.
+
+This function removes any trailing characters that are included in the
+\c rmlist. The trimming stops at the last character (i.e., first character 
+from the end) that is not in \c rmlist.  
+This function can be used to removed trailing spaces, newlines, etc.
+This is a distructive operation as it modifies the string.
+
+\param str is the string that will be trimmed.
+\param rmlist contains the set of characters that will be removed.
+\returns A pointer to \c str itself.
+\sa gk_strhprune()
+*/
+/*************************************************************************/
+char *gk_strtprune(char *str, char *rmlist)
+{
+  ssize_t i, j, len;
+
+  len = strlen(rmlist);
+
+  for (i=strlen(str)-1; i>=0; i--) {
+    for (j=0; j<len; j++) {
+      if (str[i] == rmlist[j])
+        break;
+    }
+    if (j == len)
+      break;
+  }
+
+  str[i+1] = '\0';
+
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Prunes characters from the beginning of the string.
+
+This function removes any starting characters that are included in the
+\c rmlist. The trimming stops at the first character that is not in 
+\c rmlist.
+This function can be used to removed leading spaces, tabs, etc.
+This is a distructive operation as it modifies the string.
+
+\param str is the string that will be trimmed.
+\param rmlist contains the set of characters that will be removed.
+\returns A pointer to \c str itself.
+\sa gk_strtprune()
+*/
+/*************************************************************************/
+char *gk_strhprune(char *str, char *rmlist)
+{
+  ssize_t i, j, len;
+
+  len = strlen(rmlist);
+
+  for (i=0; str[i]; i++) {
+    for (j=0; j<len; j++) {
+      if (str[i] == rmlist[j])
+        break;
+    }
+    if (j == len)
+      break;
+  }
+
+  if (i>0) { /* If something needs to be removed */
+    for (j=0; str[i]; i++, j++)
+      str[j] = str[i];
+    str[j] = '\0';
+  }
+
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Converts a string to upper case.
+
+This function converts a string to upper case. This operation modifies the 
+string itself.
+
+\param str is the string whose case will be changed.
+\returns A pointer to \c str itself.
+\sa gk_strtolower()
+*/
+/*************************************************************************/
+char *gk_strtoupper(char *str)
+{
+  int i;
+
+  for (i=0; str[i]!='\0'; str[i]=toupper(str[i]), i++); 
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Converts a string to lower case.
+
+This function converts a string to lower case. This operation modifies the 
+string itself.
+
+\param str is the string whose case will be changed.
+\returns A pointer to \c str itself.
+\sa gk_strtoupper()
+*/
+/*************************************************************************/
+char *gk_strtolower(char *str)
+{
+  int i;
+
+  for (i=0; str[i]!='\0'; str[i]=tolower(str[i]), i++); 
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Duplicates a string
+
+This function is a replacement for C's standard <em>strdup()</em> function.
+The key differences between the two are that gk_strdup():
+  - uses the dynamic memory allocation routines of \e GKlib. 
+  - it correctly handles NULL input strings.
+
+The string that is returned must be freed by gk_free().
+
+\param orgstr is the string that will be duplicated.
+\returns A pointer to the newly created string.
+\sa gk_free()
+*/
+/*************************************************************************/
+char *gk_strdup(char *orgstr)
+{
+  int len;
+  char *str=NULL;
+
+  if (orgstr != NULL) {
+    len = strlen(orgstr)+1;
+    str = gk_malloc(len*sizeof(char), "gk_strdup: str");
+    strcpy(str, orgstr);
+  }
+
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Case insensitive string comparison.
+
+This function compares two strings for equality by ignoring the case of the
+strings. 
+
+\warning This function is \b not equivalent to a case-insensitive 
+         <em>strcmp()</em> function, as it does not return ordering 
+         information.
+
+\todo Remove the above warning.
+
+\param s1 is the first string to be compared.
+\param s2 is the second string to be compared.
+\retval 1 if the strings are identical,
+\retval 0 otherwise.
+*/
+/*************************************************************************/
+int gk_strcasecmp(char *s1, char *s2)
+{
+  int i=0;
+
+  if (strlen(s1) != strlen(s2))
+    return 0;
+
+  while (s1[i] != '\0') {
+    if (tolower(s1[i]) != tolower(s2[i]))
+      return 0;
+    i++;
+  }
+
+  return 1;
+}
+
+
+/************************************************************************/
+/*! \brief Compare two strings in revere order
+
+This function is similar to strcmp but it performs the comparison as
+if the two strings were reversed.
+
+\param s1 is the first string to be compared.
+\param s2 is the second string to be compared.
+\retval -1, 0, 1, if the s1 < s2, s1 == s2, or s1 > s2.
+*/
+/*************************************************************************/
+int gk_strrcmp(char *s1, char *s2)
+{
+  int i1 = strlen(s1)-1;
+  int i2 = strlen(s2)-1;
+
+  while ((i1 >= 0) && (i2 >= 0)) {
+    if (s1[i1] != s2[i2])
+      return (s1[i1] - s2[i2]);
+    i1--;
+    i2--;
+  }
+
+  /* i1 == -1 and/or i2 == -1 */
+
+  if (i1 < i2)
+    return -1;
+  if (i1 > i2)
+    return 1;
+  return 0;
+}
+
+
+
+/************************************************************************/
+/*! \brief Converts a time_t time into a string 
+
+This function takes a time_t-specified time and returns a string-formated
+representation of the corresponding time. The format of the string is
+<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
+
+\param time is the time to be converted.
+\return It returns a pointer to a statically allocated string that is 
+        over-written in successive calls of this function. If the 
+        conversion failed, it returns NULL.
+
+*/
+/*************************************************************************/
+char *gk_time2str(time_t time)
+{
+  static char datestr[128];
+  struct tm *tm;
+
+  tm = localtime(&time);
+
+  if (strftime(datestr, 128, "%m/%d/%Y %H:%M:%S", tm) == 0)
+    return NULL;
+  else
+    return datestr;
+}
+
+
+
+#if !defined(WIN32) && !defined(__MINGW32__)
+/************************************************************************/
+/*! \brief Converts a date/time string into its equivalent time_t value
+
+This function takes date and/or time specification and converts it in
+the equivalent time_t representation. The conversion is done using the
+strptime() function. The format that gk_str2time() understands is
+<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
+
+\param str is the date/time string to be converted.
+\return If the conversion was successful it returns the time, otherwise 
+        it returns -1.
+*/
+/*************************************************************************/
+time_t gk_str2time(char *str)
+{
+  struct tm time;
+  time_t rtime;
+
+  memset(&time, '\0', sizeof(time));
+  
+  if (strptime(str, "%m/%d/%Y %H:%M:%S", &time) == NULL)
+    return -1;
+
+  rtime = mktime(&time);
+  return (rtime < 0 ? 0 : rtime);
+}
+#endif
+
+
+/*************************************************************************
+* This function returns the ID of a particular string based on the 
+* supplied StringMap array
+**************************************************************************/
+int gk_GetStringID(gk_StringMap_t *strmap, char *key)
+{
+  int i;
+
+  for (i=0; strmap[i].name; i++) {
+    if (gk_strcasecmp(key, strmap[i].name))
+      return strmap[i].id;
+  }
+
+  return -1;
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 0000000..8584820
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Build program.
+add_executable(strings strings.c)
+add_executable(gksort gksort.c)
+add_executable(fis fis.c)
+add_executable(gkrw rw.c)
+add_executable(gkgraph gkgraph.c)
+add_executable(csrcnv csrcnv.c)
+add_executable(grKx grKx.c)
+add_executable(m2mnbrs m2mnbrs.c)
+add_executable(cmpnbrs cmpnbrs.c)
+add_executable(splatt2svd splatt2svd.c)
+add_executable(gkuniq gkuniq.c)
+
+foreach(prog strings gksort fis gkrw gkgraph csrcnv grKx m2mnbrs cmpnbrs splatt2svd gkuniq)
+  target_link_libraries(${prog} GKlib)
+endforeach(prog)
+
+# Install a subset of them
+install(TARGETS csrcnv RUNTIME DESTINATION bin)
diff --git a/test/cmpnbrs.c b/test/cmpnbrs.c
new file mode 100644
index 0000000..6e3ace8
--- /dev/null
+++ b/test/cmpnbrs.c
@@ -0,0 +1,301 @@
+/*!
+\file  
+\brief It takes as input two CSR matrices A and B and computes how
+       similar AA' and A'A are to BB' and B'B, respectively in terms
+       of the cosine similarity of the corresponding rows.
+
+\date 11/09/2015
+\author George
+\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int simtype;             /*!< The similarity type to use */
+  int verbosity;           /*!< The reporting verbosity level */
+
+  char *afile;             /*!< The file storing the query documents */
+  char *bfile;             /*!< The file storing the collection documents */
+
+  /* timers */
+  double timer_global;
+} params_t;
+
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+/* Versions */
+#define VER_MAJOR           0
+#define VER_MINOR           1
+#define VER_SUBMINOR        0
+
+/* Command-line option codes */
+#define CMD_SIMTYPE         10
+#define CMD_VERBOSITY       70
+#define CMD_HELP            100
+
+/* The text labels for the different simtypes */
+static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""};
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"simtype",           1,      0,      CMD_SIMTYPE},
+  {"verbosity",         1,      0,      CMD_VERBOSITY},
+
+  {"help",              0,      0,      CMD_HELP},
+  {0,                   0,      0,      0}
+};
+
+static gk_StringMap_t simtype_options[] = {
+  {"dotp",               GK_CSR_DOTP},
+  {"cos",                GK_CSR_COS},
+  {"jac",                GK_CSR_JAC},
+  {NULL,                 0}
+};
+
+
+/*-------------------------------------------------------------------
+ * Mini help
+ *-------------------------------------------------------------------*/
+static char helpstr[][100] =
+{
+" ",
+"Usage: cmpnbrs [options] afile bfile",
+" ",
+" Options",
+"  -simtype=string",
+"     Specifies the type of similarity to use. Possible values are:",
+"       dotp  - Dot-product similarity [default]",
+"       cos   - Cosine similarity",
+"       jac   - Jacquard similarity", 
+" ",
+"  -verbosity=int",
+"     Specifies the level of debugging information to be displayed.",
+"     Default value is 0.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[]);
+double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat, gk_csr_t *bmat);
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->simtype   = GK_CSR_DOTP;
+  params->verbosity = -1;
+  params->afile     = NULL;
+  params->bfile     = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_SIMTYPE:
+        if (gk_optarg) {
+          if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1)
+            errexit("Invalid simtype of %s.\n", gk_optarg);
+        }
+        break;
+
+      case CMD_VERBOSITY:
+        if (gk_optarg) params->verbosity = atoi(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(EXIT_SUCCESS);
+        break;
+
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(EXIT_FAILURE);
+    }
+  }
+
+  /* Get the input/output file info */
+  if (argc-gk_optind != 2) {
+    printf("Missing input file info.\n  Use %s -help for a summary of the options.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  params->afile = gk_strdup(argv[gk_optind++]);
+  params->bfile = gk_strdup(argv[gk_optind++]);
+
+  if (!gk_fexists(params->afile))
+    errexit("input file %s does not exist.\n", params->afile);
+  if (!gk_fexists(params->bfile))
+    errexit("input file %s does not exist.\n", params->bfile);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the program */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  params_t *params;
+  gk_csr_t *amat, *bmat, *amatt, *bmatt;
+  int rc = EXIT_SUCCESS;
+
+  params = parse_cmdline(argc, argv);
+
+  amat = gk_csr_Read(params->afile, GK_CSR_FMT_CSR, 1, 0);
+  bmat = gk_csr_Read(params->bfile, GK_CSR_FMT_CSR, 1, 0);
+
+  /* make the matrices of similar dimensions (if neccessary) */
+  GKASSERT(amat->nrows == bmat->nrows);
+  amat->ncols = gk_max(amat->ncols, bmat->ncols);
+  bmat->ncols = amat->ncols;
+
+  /* create the transpose matrices */
+  amatt = gk_csr_Transpose(amat);
+  bmatt = gk_csr_Transpose(bmat);
+
+  printf("********************************************************************************\n");
+  printf("cmpnbrs (%d.%d.%d) Copyright 2015, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR);
+  printf("  simtype=%s\n",
+      simtypenames[params->simtype]);
+  printf("  afile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
+      params->afile, amat->nrows, amat->ncols, amat->rowptr[amat->nrows]);
+  printf("  bfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
+      params->bfile, bmat->nrows, bmat->ncols, bmat->rowptr[bmat->nrows]);
+
+  gk_clearwctimer(params->timer_global);
+  gk_startwctimer(params->timer_global);
+
+  printf("SIM(AA', BB'): %.5lf\t", ComputeNeighborhoodSimilarity(params, amat, bmat));
+  printf("SIM(A'A, B'B): %.5lf\n", ComputeNeighborhoodSimilarity(params, amatt, bmatt));
+
+  gk_stopwctimer(params->timer_global);
+
+  printf("    wclock: %.2lfs\n", gk_getwctimer(params->timer_global));
+  printf("********************************************************************************\n");
+
+  gk_csr_Free(&amat);
+  gk_csr_Free(&bmat);
+  gk_csr_Free(&amatt);
+  gk_csr_Free(&bmatt);
+
+  exit(rc);
+}
+
+
+/*************************************************************************/
+/*! Compares the neighbors of AA' vs BB' */
+/**************************************************************************/
+double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat, 
+           gk_csr_t *bmat)
+{
+  int iR, iH, nahits, nbhits, ncmps;
+  int32_t *marker;
+  gk_fkv_t *ahits, *bhits, *cand;
+  double tabsim, abdot, anorm2, bnorm2, *avec, *bvec;
+
+  /* if cosine, make rows unit length */
+  if (params->simtype == GK_CSR_COS) {
+    gk_csr_Normalize(amat, GK_CSR_ROW, 2);
+    gk_csr_Normalize(bmat, GK_CSR_ROW, 2);
+  }
+
+  /* create the inverted index */
+  gk_csr_CreateIndex(amat, GK_CSR_COL);
+  gk_csr_CreateIndex(bmat, GK_CSR_COL);
+
+  /* compute the row squared norms */
+  gk_csr_ComputeSquaredNorms(amat, GK_CSR_ROW);
+  gk_csr_ComputeSquaredNorms(bmat, GK_CSR_ROW);
+
+
+  /* allocate memory for the necessary working arrays */
+  ahits  = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: ahits");
+  bhits  = gk_fkvmalloc(bmat->nrows, "ComputeNeighborhoodSimilarity: bhits");
+  marker = gk_i32smalloc(amat->nrows, -1, "ComputeNeighborhoodSimilarity: marker");
+  cand   = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: cand");
+  avec   = gk_dsmalloc(amat->nrows, 0.0, "ComputeNeighborhoodSimilarity: avec");
+  bvec   = gk_dsmalloc(bmat->nrows, 0.0, "ComputeNeighborhoodSimilarity: bvec");
+
+
+  /* find the best neighbors for each row in the two matrices and compute 
+     the cosine similarity between them. */
+  tabsim = 0.0;
+  ncmps  = 0;
+  for (iR=0; iR<amat->nrows; iR++) {
+    if (params->verbosity > 1)
+      printf("Working on row %7d\n", iR);
+
+    if (amat->rowptr[iR+1]-amat->rowptr[iR] == 0 ||
+        bmat->rowptr[iR+1]-bmat->rowptr[iR] == 0)
+      continue;
+
+    nahits = gk_csr_GetSimilarRows(amat, 
+                 amat->rowptr[iR+1]-amat->rowptr[iR], 
+                 amat->rowind+amat->rowptr[iR], 
+                 amat->rowval+amat->rowptr[iR], 
+                 params->simtype, amat->nrows, 0.0,
+                 ahits, marker, cand);
+
+    nbhits = gk_csr_GetSimilarRows(bmat, 
+                 bmat->rowptr[iR+1]-bmat->rowptr[iR], 
+                 bmat->rowind+bmat->rowptr[iR], 
+                 bmat->rowval+bmat->rowptr[iR], 
+                 params->simtype, bmat->nrows, 0.0,
+                 bhits, marker, cand);
+
+    if (params->verbosity > 0)
+      printf("Row %7d %7d %7d %8zd %8zd\n", iR, nahits, nbhits, 
+          amat->rowptr[iR+1]-amat->rowptr[iR], bmat->rowptr[iR+1]-bmat->rowptr[iR]);
+
+    for (iH=0; iH<nahits; iH++) 
+      avec[ahits[iH].val] = ahits[iH].key;
+    for (iH=0; iH<nbhits; iH++) 
+      bvec[bhits[iH].val] = bhits[iH].key;
+
+    for (abdot=anorm2=bnorm2=0.0, iH=0; iH<amat->nrows; iH++) {
+      abdot  += avec[iH]*bvec[iH];
+      anorm2 += avec[iH]*avec[iH];
+      bnorm2 += bvec[iH]*bvec[iH];
+    }
+    tabsim += (abdot > 0 ? abdot/sqrt(anorm2*bnorm2) : 0.0);
+    ncmps++;
+
+    for (iH=0; iH<nahits; iH++) 
+      avec[ahits[iH].val] = 0.0;
+    for (iH=0; iH<nbhits; iH++) 
+      bvec[bhits[iH].val] = 0.0;
+  }
+
+  gk_free((void **)&ahits, &bhits, &marker, &cand, &avec, &bvec, LTERM);
+
+  return tabsim/ncmps;
+}
+
diff --git a/test/csrcnv.c b/test/csrcnv.c
new file mode 100644
index 0000000..aef808e
--- /dev/null
+++ b/test/csrcnv.c
@@ -0,0 +1,397 @@
+/*!
+\file  
+\brief A simple program to convert between different matrix formats that are supported
+       by the gk_csr_Read/gk_csr_Write functions.
+
+\date 5/30/2013
+\author George
+\version \verbatim $Id: csrcnv.c 15314 2013-10-05 16:50:50Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int inf, outf;    /* input/output format */
+  int numbering;    /* input numbering (output when applicable) */
+  int readvals;     /* input values (output when applicable) */
+  int writevals;    /* output values */
+  int rshuf, cshuf; /* random shuffle of rows/columns */
+  int symmetric;    /* a symmetric shuffle */
+  int mincolfreq;   /* column prunning */
+  int maxcolfreq;   /* column prunning */
+  int minrowfreq;   /* row prunning */
+  int maxrowfreq;   /* row prunning */
+  float rownrmfltr; /* row-lowfilter threshold */
+  int compactcols;  /* if to renumber columns to eliminate empty ones */
+  int transpose;    /* transpose the output matrix */
+  char *srenumber;  /* the iperm file for the symmetric renumbering */
+  char *infile;     /* input file */
+  char *outfile;    /* output file */
+} params_t;
+
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NUMONE        1
+#define CMD_NOREADVALS    2
+#define CMD_NOWRITEVALS   3
+#define CMD_RSHUF         4
+#define CMD_CSHUF         5
+#define CMD_SYMMETRIC     6
+#define CMD_MINCOLFREQ    7
+#define CMD_MAXCOLFREQ    8
+#define CMD_MINROWFREQ    9
+#define CMD_MAXROWFREQ    10
+#define CMD_ROWNRMFLTR    11
+#define CMD_COMPACTCOLS   12
+#define CMD_TRANSPOSE     13
+#define CMD_SRENUMBER     14
+#define CMD_HELP          100
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"numone",      0,      0,      CMD_NUMONE},
+  {"noreadvals",  0,      0,      CMD_NOREADVALS},
+  {"nowritevals", 0,      0,      CMD_NOWRITEVALS},
+  {"rshuf",       0,      0,      CMD_RSHUF},
+  {"cshuf",       0,      0,      CMD_CSHUF},
+  {"symmetric",   0,      0,      CMD_SYMMETRIC},
+  {"mincolfreq",  1,      0,      CMD_MINCOLFREQ},
+  {"maxcolfreq",  1,      0,      CMD_MAXCOLFREQ},
+  {"minrowfreq",  1,      0,      CMD_MINROWFREQ},
+  {"maxrowfreq",  1,      0,      CMD_MAXROWFREQ},
+  {"rownrmfltr",  1,      0,      CMD_ROWNRMFLTR},
+  {"compactcols", 0,      0,      CMD_COMPACTCOLS},
+  {"transpose",   0,      0,      CMD_TRANSPOSE},
+  {"srenumber",   1,      0,      CMD_SRENUMBER},
+  {"help",        0,      0,      CMD_HELP},
+  {0,             0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: csrconv [options] <infile> <inf> <outfile> <outf>",
+" ",
+" Required parameters",
+"  infile, outfile",
+"     The name of the input/output CSR file.",
+" ",
+"  inf/outf",
+"     The format of the input/output file.",
+"     Supported values are:",
+"        1  GK_CSR_FMT_CLUTO",
+"        2  GK_CSR_FMT_CSR",
+"        3  GK_CSR_FMT_METIS",
+"        4  GK_CSR_FMT_BINROW",
+"        6  GK_CSR_FMT_IJV",
+"        7  GK_CSR_FMT_BIJV",
+" ",
+" Optional parameters",
+"  -numone",
+"     Specifies that the numbering of the input file starts from 1. ",
+"     It only applies to CSR/IJV formats.",
+" ",
+"  -nowritevals",
+"     Specifies that no values will be output.",
+" ",
+"  -noreadvals",
+"     Specifies that the values will not be read when applicable.",
+" ",
+"  -rshuf",
+"     Specifies that the rows will be randmly shuffled prior to output.",
+" ",
+"  -cshuf",
+"     Specifies that the columns will be randmly shuffled prior to output.",
+" ",
+"  -symmetric",
+"     Specifies that the row+column shuffling will be symmetric.",
+" ",
+"  -mincolfreq=int",
+"     Used to prune infrequent columns.",
+" ",
+"  -maxcolfreq=int",
+"     Used to prune frequent columns.",
+" ",
+"  -minrowfreq=int",
+"     Used to prune infrequent rows.",
+" ",
+"  -maxrowfreq=int",
+"     Used to prune frequent.",
+" ",
+"  -rownrmfltr=float",
+"     The parameter to use for the row-wise low filter.",
+" ",
+"  -compactcols",
+"     Specifies if empty columns will be removed and the columns renumbered.",
+" ",
+"  -transpose",
+"     Specifies that the transposed matrix will be written.",
+" ",
+"  -srenumber=iperm-file",
+"     Performs a symmetric renumbering based on the provided iperm file.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: csrconv [options] <infile> <inf> <outfile> <outf>",
+"          use 'csrconv -help' for a summary of the options.",
+""
+};
+ 
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->numbering = 0;
+  params->readvals  = 1;
+  params->writevals = 1;
+  params->rshuf     = 0;
+  params->cshuf     = 0;
+  params->symmetric = 0;
+  params->transpose = 0;
+  params->srenumber = NULL;
+
+  params->mincolfreq  = -1;
+  params->minrowfreq  = -1;
+  params->maxcolfreq  = -1;
+  params->maxrowfreq  = -1;
+  params->rownrmfltr  = -1;
+  params->compactcols = 0;
+
+  params->inf       = -1;
+  params->outf      = -1;
+  params->infile    = NULL;
+  params->outfile   = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_NUMONE:
+        params->numbering = 1;
+        break;
+      case CMD_NOREADVALS:
+        params->readvals = 0;
+        break;
+      case CMD_NOWRITEVALS:
+        params->writevals = 0;
+        break;
+      case CMD_RSHUF:
+        params->rshuf = 1;
+        break;
+      case CMD_CSHUF:
+        params->cshuf = 1;
+        break;
+      case CMD_SYMMETRIC:
+        params->symmetric = 1;
+        break;
+      case CMD_TRANSPOSE:
+        params->transpose = 1;
+        break;
+
+
+      case CMD_MINCOLFREQ:
+        if (gk_optarg) params->mincolfreq = atoi(gk_optarg);
+        break;
+      case CMD_MINROWFREQ:
+        if (gk_optarg) params->minrowfreq = atoi(gk_optarg);
+        break;
+      case CMD_MAXCOLFREQ:
+        if (gk_optarg) params->maxcolfreq = atoi(gk_optarg);
+        break;
+      case CMD_MAXROWFREQ:
+        if (gk_optarg) params->maxrowfreq = atoi(gk_optarg);
+        break;
+      case CMD_ROWNRMFLTR:
+        if (gk_optarg) params->rownrmfltr = atof(gk_optarg);
+        break;
+      case CMD_COMPACTCOLS:
+        params->compactcols = 1;
+        break;
+
+      case CMD_SRENUMBER:
+        if (gk_optarg) {
+          params->srenumber = gk_strdup(gk_optarg);
+          if (!gk_fexists(params->srenumber))
+            errexit("srenumber file %s does not exist.\n", params->srenumber);
+        }
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 4) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+  params->inf     = atoi(argv[gk_optind++]);
+  params->outfile = gk_strdup(argv[gk_optind++]);
+  params->outf    = atoi(argv[gk_optind++]);
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  int what;
+  params_t *params;
+  gk_csr_t *mat, *mat1, *smat;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  /* read the data */
+  mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering);
+
+  /* deal with weird transformations */
+  if (params->mincolfreq != -1 || params->maxcolfreq != -1) {
+    params->mincolfreq = (params->mincolfreq == -1 ? 0 : params->mincolfreq);
+    params->maxcolfreq = (params->maxcolfreq == -1 ? mat->nrows : params->maxcolfreq);
+
+    printf("Column prune: %d %d; nnz: %zd => ", 
+        params->mincolfreq, params->maxcolfreq, mat->rowptr[mat->nrows]);
+    mat1 = gk_csr_Prune(mat, GK_CSR_COL, params->mincolfreq, params->maxcolfreq);
+    gk_csr_Free(&mat);
+    mat = mat1;
+    mat1 = NULL;
+
+    printf("%zd\n", mat->rowptr[mat->nrows]);
+  }
+  
+  if (params->minrowfreq != -1 || params->maxrowfreq != -1) {
+    params->minrowfreq = (params->minrowfreq == -1 ? 0 : params->minrowfreq);
+    params->maxrowfreq = (params->maxrowfreq == -1 ? mat->ncols : params->maxrowfreq);
+
+    printf("Row prune: %d %d; nnz: %zd => ", 
+        params->minrowfreq, params->maxrowfreq, mat->rowptr[mat->nrows]);
+    mat1 = gk_csr_Prune(mat, GK_CSR_ROW, params->minrowfreq, params->maxrowfreq);
+    gk_csr_Free(&mat);
+    mat = mat1;
+    mat1 = NULL;
+
+    printf("%zd\n", mat->rowptr[mat->nrows]);
+  }
+
+  if (params->rownrmfltr >= 0.0) {
+    //gk_csr_Scale(mat, GK_CSR_LOG);
+    //gk_csr_Scale(mat, GK_CSR_IDF2);
+
+    printf("Row low filter: %f; nnz: %zd => ", params->rownrmfltr, mat->rowptr[mat->nrows]);
+    mat1 = gk_csr_LowFilter(mat, GK_CSR_ROW, 2, params->rownrmfltr);
+    gk_csr_Normalize(mat1, GK_CSR_ROW, 2);
+
+    gk_csr_Free(&mat);
+    mat = mat1;
+    mat1 = NULL;
+
+    printf("%zd\n", mat->rowptr[mat->nrows]);
+  }
+
+  if (params->compactcols) {
+    printf("Compacting columns: %d => ", mat->ncols);
+    gk_csr_CompactColumns(mat);
+    printf("%d\n", mat->ncols);
+  }
+
+
+  if (params->rshuf || params->cshuf) {
+    if (params->rshuf && params->cshuf)
+      what = GK_CSR_ROWCOL;
+    else if (params->rshuf)
+      what = GK_CSR_ROW;
+    else
+      what = GK_CSR_COL;
+
+    smat = gk_csr_Shuffle(mat, what, params->symmetric);
+    gk_csr_Free(&mat);
+    mat = smat;
+  }
+
+
+  if (params->srenumber) {
+    int32_t i;
+    size_t nlines;
+    int32_t *iperm;
+    gk_csr_t *smat;
+
+    iperm = gk_i32readfile(params->srenumber, &nlines);
+    if (nlines != mat->nrows && nlines != mat->ncols)
+      errexit("The nlines=%zud of srenumber file does not match nrows: %d, ncols: %d\n", nlines, mat->nrows, mat->ncols);
+
+    if (gk_i32max(nlines, iperm, 1) >= nlines && gk_i32min(nlines, iperm, 1) <= 0) 
+      errexit("The srenumber iperm seems to be wrong.\n");
+    
+    if (gk_i32max(nlines, iperm, 1) == nlines) { /* need to renumber */
+      for (i=0; i<nlines; i++)
+        iperm[i]--;
+    }
+
+    smat = gk_csr_ReorderSymmetric(mat, iperm, NULL);
+    gk_csr_Free(&mat);
+    mat = smat;
+
+    gk_free((void **)&iperm, LTERM);
+  }
+
+  if (params->writevals && mat->rowval == NULL) 
+    mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval");
+
+  if (params->transpose) {
+    mat1 = gk_csr_Transpose(mat);
+    gk_csr_Free(&mat);
+    mat = mat1;
+    mat1 = NULL;
+  }
+
+
+
+  gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0);
+
+  gk_csr_Free(&mat);
+
+}
+
diff --git a/test/fis.c b/test/fis.c
new file mode 100644
index 0000000..084a4b6
--- /dev/null
+++ b/test/fis.c
@@ -0,0 +1,286 @@
+/*!
+\file  
+\brief A simple frequent itemset discovery program to test GKlib's routines
+
+\date 6/12/2008
+\author George
+\version \verbatim $Id: fis.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  ssize_t minlen, maxlen;
+  ssize_t minfreq, maxfreq;
+  char *filename;
+  int silent;
+  ssize_t nitemsets;
+  char *clabelfile;
+  char **clabels;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_MINLEN      1
+#define CMD_MAXLEN      2
+#define CMD_MINFREQ     3
+#define CMD_MAXFREQ     4
+#define CMD_SILENT      5
+#define CMD_CLABELFILE  6
+#define CMD_HELP        10
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"minlen",        1,      0,      CMD_MINLEN},
+  {"maxlen",        1,      0,      CMD_MAXLEN},
+  {"minfreq",       1,      0,      CMD_MINFREQ},
+  {"maxfreq",       1,      0,      CMD_MAXFREQ},
+  {"silent",        0,      0,      CMD_SILENT},
+  {"clabels",       1,      0,      CMD_CLABELFILE},
+  {"help",          0,      0,      CMD_HELP},
+  {0,               0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: fis [options] <mat-file>",
+" ",
+" Required parameters",
+"  mat-file",
+"     The name of the file storing the transactions. The file is in ",
+"     Cluto's .mat format.",
+" ",
+" Optional parameters",
+"  -minlen=int",
+"     Specifies the minimum length of the patterns. [default: 1]",
+" ",
+"  -maxlen=int",
+"     Specifies the maximum length of the patterns. [default: none]",
+" ",
+"  -minfreq=int",
+"     Specifies the minimum frequency of the patterns. [default: 10]",
+" ",
+"  -maxfreq=int",
+"     Specifies the maximum frequency of the patterns. [default: none]",
+" ",
+"  -silent",
+"     Does not print the discovered itemsets.",
+" ",
+"  -clabels=filename",
+"     Specifies the name of the file that stores the column labels.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: fis [options] <mat-file>",
+"          use 'fis -help' for a summary of the options.",
+""
+};
+ 
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat);
+void print_final_info(params_t *params);
+params_t *parse_cmdline(int argc, char *argv[]);
+void print_an_itemset(void *stateptr, int nitems, int *itemind, 
+                      int ntrans, int *tranind);
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  ssize_t i;
+  char line[8192];
+  FILE *fpin;
+  params_t *params;
+  gk_csr_t *mat;
+ 
+  params = parse_cmdline(argc, argv);
+  params->nitemsets = 0;
+
+  /* read the data */
+  mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1);
+  gk_csr_CreateIndex(mat, GK_CSR_COL);
+
+  /* read the column labels */
+  params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels");
+  if (params->clabelfile == NULL) {
+    for (i=0; i<mat->ncols; i++) {
+      sprintf(line, "%zd", i);
+      params->clabels[i] = gk_strdup(line);
+    }
+  }
+  else {
+    fpin = gk_fopen(params->clabelfile, "r", "main: fpin");
+    for (i=0; i<mat->ncols; i++) {
+      if (fgets(line, 8192, fpin) == NULL)
+        errexit("Failed on fgets.\n");
+      params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t"));
+    }
+    gk_fclose(fpin);
+  }
+
+
+  print_init_info(params, mat);
+
+  gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind,
+      params->minfreq, params->maxfreq, params->minlen, params->maxlen,
+      &print_an_itemset, (void *)params);
+
+  printf("Total itemsets found: %zd\n", params->nitemsets);
+
+  print_final_info(params);
+}  
+
+
+
+/*************************************************************************/
+/*! This function prints run parameters */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat)
+{
+  printf("*******************************************************************************\n");
+  printf(" fis\n\n");
+  printf("Matrix Information ---------------------------------------------------------\n");
+  printf(" input file=%s, [%d, %d, %zd]\n", 
+      params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
+
+  printf("\n");
+  printf("Options --------------------------------------------------------------------\n");
+  printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n",
+      params->minlen, params->maxlen, params->minfreq, params->maxfreq);
+
+  printf("\n");
+  printf("Finding patterns... -----------------------------------------------------\n");
+}
+
+
+/*************************************************************************/
+/*! This function prints final statistics */
+/*************************************************************************/
+void print_final_info(params_t *params)
+{
+  printf("\n");
+  printf("Memory Usage Information -----------------------------------------------------\n");
+  printf("   Maximum memory used:              %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
+  printf("   Current memory used:              %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
+  printf("********************************************************************************\n");
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->minlen     = 1;
+  params->maxlen     = -1;
+  params->minfreq    = 10;
+  params->maxfreq    = -1;
+  params->silent     = 0;
+  params->filename   = NULL;
+  params->clabelfile = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_MINLEN:
+        if (gk_optarg) params->minlen = atoi(gk_optarg);
+        break;
+      case CMD_MAXLEN:
+        if (gk_optarg) params->maxlen = atoi(gk_optarg);
+        break;
+      case CMD_MINFREQ:
+        if (gk_optarg) params->minfreq = atoi(gk_optarg);
+        break;
+      case CMD_MAXFREQ:
+        if (gk_optarg) params->maxfreq = atoi(gk_optarg);
+        break;
+
+      case CMD_SILENT:
+        params->silent = 1;
+        break;
+
+      case CMD_CLABELFILE:
+        if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 1) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->filename = gk_strdup(argv[gk_optind++]);
+
+  if (!gk_fexists(params->filename))
+    errexit("input file %s does not exist.\n", params->filename);
+
+  return params;
+}
+
+
+
+/*************************************************************************/
+/*! This is the callback function for the itemset discovery routine */
+/*************************************************************************/
+void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans, 
+         int *transids)
+{
+  ssize_t i;
+  params_t *params;
+
+  params = (params_t *)stateptr;
+  params->nitemsets++;
+
+  if (!params->silent) {
+    printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans);
+    for (i=0; i<nitems; i++)
+      printf(" %s", params->clabels[itemids[i]]);
+    printf("\n");
+    for (i=0; i<ntrans; i++)
+      printf(" %d\n", transids[i]);
+    printf("\n");
+  }
+}
diff --git a/test/gkgraph.c b/test/gkgraph.c
new file mode 100644
index 0000000..9131464
--- /dev/null
+++ b/test/gkgraph.c
@@ -0,0 +1,845 @@
+/*!
+\file  
+\brief A simple program to try out some graph routines
+
+\date 6/12/2008
+\author George
+\version \verbatim $Id: gkgraph.c 17700 2014-09-27 18:10:02Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int lnbits;
+  int cnbits;
+  int type;
+  int niter;
+  float eps;
+  float lamda;
+  int nosort;
+  int write;
+
+  char *infile;
+  char *outfile;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NITER       1
+#define CMD_EPS         2
+#define CMD_LAMDA       3
+#define CMD_TYPE        4
+#define CMD_NOSORT      5
+#define CMD_WRITE       6
+#define CMD_LNBITS      7
+#define CMD_CNBITS      8
+#define CMD_HELP        10
+
+#define CLINE32 16
+#define CLINE64 8
+#define MAXRCLOCKSPAN   (1<<20)
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"lnbits",     1,      0,      CMD_LNBITS},
+  {"cnbits",     1,      0,      CMD_CNBITS},
+  {"type",       1,      0,      CMD_TYPE},
+  {"niter",      1,      0,      CMD_NITER},
+  {"lamda",      1,      0,      CMD_LAMDA},
+  {"eps",        1,      0,      CMD_EPS},
+  {"nosort",     0,      0,      CMD_NOSORT},
+  {"write",      0,      0,      CMD_WRITE},
+  {"help",       0,      0,      CMD_HELP},
+  {0,            0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: gkgraph [options] <graph-file> [<out-file>]",
+" ",
+" Required parameters",
+"  graph-file",
+"     The name of the file storing the graph. The file is in ",
+"     Metis' graph format.",
+" ",
+" Optional parameters",
+"  -niter=int",
+"     Specifies the maximum number of iterations. [default: 100]",
+" ",
+"  -lnbits=int",
+"     Specifies the number of address bits indexing the cacheline. [default: 6]",
+" ",
+"  -cnbits=int",
+"     Specifies the number of address bits indexing the cache. [default: 13]",
+" ",
+"  -lamda=float",
+"     Specifies the follow-the-adjacent-links probability. [default: 0.80]",
+" ",
+"  -eps=float",
+"     Specifies the error tollerance. [default: 1e-10]",
+" ",
+"  -nosort",
+"     Does not sort the adjacency lists.",
+" ",
+"  -write",
+"     Output the reordered graphs.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: gkgraph [options] <graph-file> [<out-file>]",
+"          use 'gkgraph -help' for a summary of the options.",
+""
+};
+ 
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+void test_spmv(params_t *params);
+void test_tc(params_t *params);
+void sort_adjacencies(params_t *params, gk_graph_t *graph);
+double compute_spmvstats(params_t *params, gk_graph_t *graph);
+double compute_tcstats(params_t *params, gk_graph_t *graph, int32_t *iperm);
+int32_t *reorder_degrees(params_t *params, gk_graph_t *graph);
+int32_t *reorder_freqlpn(params_t *params, gk_graph_t *graph);
+int32_t *reorder_freqlpn_db(params_t *params, gk_graph_t *graph);
+int32_t *reorder_minlpn(params_t *params, gk_graph_t *graph);
+int32_t *reorder_minlpn_db(params_t *params, gk_graph_t *graph);
+void print_init_info(params_t *params, gk_graph_t *graph);
+void print_final_info(params_t *params);
+params_t *parse_cmdline(int argc, char *argv[]);
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  params_t *params;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  test_tc(params);
+}
+
+
+/*************************************************************************/
+/*! various spmv-related tests */
+/**************************************************************************/
+void test_spmv(params_t *params)
+{
+  ssize_t i, j, v;
+  gk_graph_t *graph, *pgraph;
+  int32_t *perm;
+ 
+  /* read the data */
+  graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, -1, -1, 0, 0, 0);
+
+  /* display some basic stats */
+  print_init_info(params, graph);
+
+  sort_adjacencies(params, graph);
+  if (params->write) gk_graph_Write(graph, "original.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("Input            SPMV HitRate: %.4lf\n", compute_spmvstats(params, graph));
+
+
+  v = RandomInRange(graph->nvtxs);
+  gk_graph_ComputeBFSOrdering(graph, v, &perm, NULL);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "bfs.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("BFS              SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+
+  perm = reorder_degrees(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "degrees.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("Degrees          SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+
+  perm = reorder_freqlpn(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "freqlpn.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("FreqLabelPropN   SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  perm = reorder_freqlpn_db(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "freqlpn-db.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("DBFreqLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  perm = reorder_minlpn(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "minlpn.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("MinLabelPropN    SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  perm = reorder_minlpn_db(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "minlpn-db.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("DBMinLabelPropN  SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  gk_graph_Free(&graph);
+
+  print_final_info(params);
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! various tc-related tests */
+/**************************************************************************/
+void test_tc(params_t *params)
+{
+  ssize_t i, j, v;
+  gk_graph_t *graph, *pgraph;
+  int32_t *perm, *iperm;
+ 
+  /* read the data */
+  graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, -1, -1, 0, 0, 0);
+
+  /* display some basic stats */
+  print_init_info(params, graph);
+
+  perm = reorder_degrees(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  gk_free((void **)&perm, LTERM);
+  sort_adjacencies(params, pgraph);
+  iperm = gk_i32incset(graph->nvtxs, 0, gk_i32malloc(graph->nvtxs, "iperm"));
+  printf("Degrees          TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
+
+
+  sort_adjacencies(params, pgraph);
+  v = RandomInRange(pgraph->nvtxs);
+  gk_graph_ComputeBFSOrdering(pgraph, v, &perm, NULL);
+  for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i;
+  gk_free((void **)&perm, LTERM);
+  printf("BFS              TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
+
+
+  sort_adjacencies(params, pgraph);
+  perm = reorder_freqlpn(params, pgraph);
+  for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i;
+  gk_free((void **)&perm, LTERM);
+  printf("FreqLabelPropN   TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
+
+  sort_adjacencies(params, pgraph);
+  perm = reorder_freqlpn_db(params, pgraph);
+  for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i;
+  gk_free((void **)&perm, LTERM);
+  printf("DBFreqLabelPropN TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
+
+
+#ifdef XXX
+  perm = reorder_minlpn(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "minlpn.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("MinLabelPropN    SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  perm = reorder_minlpn_db(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "minlpn-db.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("DBMinLabelPropN  SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+#endif
+
+  gk_free((void **)&iperm, LTERM);
+  gk_graph_Free(&graph);
+
+  print_final_info(params);
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! This function sorts the adjacency lists of the vertices in increasing
+    order.
+*/
+/*************************************************************************/
+void sort_adjacencies(params_t *params, gk_graph_t *graph)
+{
+  uint64_t i, nvtxs;
+  ssize_t *xadj; 
+  int32_t *adjncy;
+
+  if (params->nosort)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  for (i=0; i<nvtxs; i++) 
+    gk_i32sorti(xadj[i+1]-xadj[i], adjncy+xadj[i]);
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! This function analyzes the cache locality of an SPMV operation using
+    GKlib's cache simulator and returns the cache's hit rate.
+ */
+/*************************************************************************/
+double compute_spmvstats(params_t *params, gk_graph_t *graph)
+{
+  uint64_t i, nvtxs;
+  ssize_t *xadj; 
+  int32_t *adjncy, *vec;
+
+  gk_cache_t *cache = gk_cacheCreate(16, params->lnbits, params->cnbits); /* 8MB total; i7 spec */
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  vec = gk_i32malloc(nvtxs, "vec");
+  for (i=0; i<xadj[nvtxs]; i++) {
+    gk_cacheLoad(cache, (size_t)(&adjncy[i]));
+    gk_cacheLoad(cache, (size_t)(&vec[adjncy[i]]));
+  }
+
+  gk_free((void **)&vec, LTERM);
+
+  double hitrate = gk_cacheGetHitRate(cache);
+  gk_cacheDestroy(&cache);
+
+  return hitrate;
+}
+
+
+/*************************************************************************/
+/*! The hash-map-based triangle-counting routine that uses the JIK
+    triangle enumeration scheme.
+
+    This version implements the following:
+      - It does not store location information in L
+      - Reverts the order within U's adjancency lists to allow ++ traversal
+*/
+/*************************************************************************/
+double compute_tcstats(params_t *params, gk_graph_t *graph, int32_t *iperm)
+{
+  int32_t vi, vj, vjj, vk, vl, nvtxs;
+  ssize_t ei, eiend, eistart, ej, ejend, ejstart;
+  int64_t ntriangles;
+  ssize_t *xadj, *uxadj;
+  int32_t *adjncy;
+  int32_t l, hmsize, *hmap;
+  
+  gk_cache_t *cache = gk_cacheCreate(16, params->lnbits, params->cnbits); 
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* determine the starting location of the upper trianglular part */
+  uxadj = gk_zmalloc(nvtxs, "uxadj");
+  for (vi=0; vi<nvtxs; vi++) {
+    for (ei=xadj[vi], eiend=xadj[vi+1]; ei<eiend && adjncy[ei]<vi; ei++); 
+    uxadj[vi] = ei;
+    /* flip the order of Adj(vi)'s upper triangular adjacency list */
+    for (ej=xadj[vi+1]-1; ei<ej; ei++, ej--) {
+      vj = adjncy[ei];
+      adjncy[ei] = adjncy[ej];
+      adjncy[ej] = vj;
+    }
+  }
+
+  /* determine the size of the hash-map and convert it into a format
+     that is compatible with a bitwise AND operation */
+  for (hmsize=0, vi=0; vi<nvtxs; vi++) 
+    hmsize = gk_max(hmsize, (int32_t)(xadj[vi+1]-uxadj[vi]));
+  for (l=1; hmsize>(1<<l); l++);
+  hmsize = (1<<(l+4))-1;
+  hmap = gk_i32smalloc(hmsize+1, 0, "hmap");
+
+  for (ntriangles=0, vjj=0; vjj<nvtxs; vjj++) {
+    vj = iperm[vjj];
+
+    gk_cacheLoad(cache, (size_t)(&xadj[vj]));
+    gk_cacheLoad(cache, (size_t)(&xadj[vj+1]));
+    gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
+
+    if (xadj[vj+1]-uxadj[vj] == 0 || uxadj[vj] == xadj[vj])
+      continue;
+
+    /* hash Adj(vj) */
+    gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
+    gk_cacheLoad(cache, (size_t)(&xadj[vj+1]));
+    for (ej=uxadj[vj], ejend=xadj[vj+1]; ej<ejend; ej++) {
+      gk_cacheLoad(cache, (size_t)(&adjncy[ej]));
+      vk = adjncy[ej];
+      for (l=(vk&hmsize); 
+           gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=0; 
+           l=((l+1)&hmsize));
+      hmap[l] = vk;
+    }
+
+    /* find intersections */
+    gk_cacheLoad(cache, (size_t)(&xadj[vj]));
+    gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
+    for (ej=xadj[vj], ejend=uxadj[vj]; ej<ejend; ej++) {
+      gk_cacheLoad(cache, (size_t)(&adjncy[ej]));
+      gk_cacheLoad(cache, (size_t)(&uxadj[vi]));
+      vi = adjncy[ej];
+      for (ei=uxadj[vi]; gk_cacheLoad(cache, (size_t)(&adjncy[ei])) && adjncy[ei]>vj; ei++) {
+        vk = adjncy[ei];
+        for (l=vk&hmsize; 
+             gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=0 && hmap[l]!=vk; 
+             l=((l+1)&hmsize));
+        gk_cacheLoad(cache, (size_t)(&hmap[l]));
+        if (hmap[l] == vk) 
+          ntriangles++;
+      }
+    }
+
+    /* reset hash */
+    gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
+    gk_cacheLoad(cache, (size_t)(&xadj[vj+1]));
+    for (ej=uxadj[vj], ejend=xadj[vj+1]; ej<ejend; ej++) {
+      gk_cacheLoad(cache, (size_t)(&adjncy[ej]));
+      vk = adjncy[ej];
+      for (l=(vk&hmsize); 
+           gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=vk; 
+           l=((l+1)&hmsize));
+      hmap[l] = 0;
+    }
+  }
+  printf("& compatible hmsize: %"PRId32" #triangles: %"PRIu64"\n", hmsize, ntriangles);
+
+  gk_free((void **)&uxadj, &hmap, LTERM);
+
+  //printf("%zd %zd\n", (ssize_t)cache->nhits, (ssize_t)cache->clock);
+
+  double hitrate = gk_cacheGetHitRate(cache);
+  gk_cacheDestroy(&cache);
+
+  return hitrate;
+}
+
+
+/*************************************************************************/
+/*! This function computes an increasing degree ordering 
+*/
+/*************************************************************************/
+int32_t *reorder_degrees(params_t *params, gk_graph_t *graph)
+{
+  int i, v, u, nvtxs, range;
+  ssize_t j, *xadj; 
+  int32_t *counts, *perm;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+
+  for (range=0, i=0; i<nvtxs; i++) 
+    range = gk_max(range, xadj[i+1]-xadj[i]);
+  range++;
+
+  counts = gk_i32smalloc(range+1, 0, "counts");
+  for (i=0; i<nvtxs; i++)
+    counts[xadj[i+1]-xadj[i]]++;
+  MAKECSR(i, range, counts);
+
+  perm = gk_i32malloc(nvtxs, "perm");
+  for (i=0; i<nvtxs; i++)
+    perm[i] = counts[xadj[i+1]-xadj[i]]++;
+
+  gk_free((void **)&counts, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function re-orders the graph by:
+    - performing a fixed number of most-popular label propagation iterations
+    - locally renumbers the vertices with the same label
+*/
+/*************************************************************************/
+int32_t *reorder_freqlpn(params_t *params, gk_graph_t *graph)
+{
+  int32_t i, ii, k, nvtxs, maxlbl;
+  ssize_t j, *xadj; 
+  int32_t *adjncy, *labels, *freq, *perm;
+  gk_i32kv_t *cand;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
+  freq   = gk_i32smalloc(nvtxs, 0, "freq");
+  perm   = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
+
+  for (k=0; k<params->niter; k++) {
+    gk_i32randArrayPermuteFine(nvtxs, perm, 0);
+    for (ii=0; ii<nvtxs; ii++) {
+      i = perm[ii];
+      maxlbl = labels[adjncy[xadj[i]]];
+      freq[maxlbl] = 1;
+      for (j=xadj[i]+1; j<xadj[i+1]; j++) {
+        freq[labels[adjncy[j]]]++;
+        if (freq[maxlbl] < freq[labels[adjncy[j]]])
+          maxlbl = labels[adjncy[j]];
+        else if (freq[maxlbl] == freq[labels[adjncy[j]]]) {
+          if (RandomInRange(2))
+            maxlbl = labels[adjncy[j]];
+        }
+      }
+      for (j=xadj[i]; j<xadj[i+1]; j++) 
+        freq[labels[adjncy[j]]] = 0;
+      labels[i] = maxlbl;
+    }
+  }
+
+  cand = gk_i32kvmalloc(nvtxs, "cand");
+  for (i=0; i<nvtxs; i++) {
+    cand[i].key = labels[i];
+    cand[i].val = i;
+  }
+  gk_i32kvsorti(nvtxs, cand);
+
+  for (i=0; i<nvtxs; i++)
+    perm[cand[i].val] = i;
+
+  gk_free((void **)&labels, &freq, &cand, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function re-orders the graph by:
+    - performing a fixed number of most-popular label propagation iterations
+    - restricts that propagation to take place within similar degree buckets
+      of vertices
+    - locally renumbers the vertices with the same label
+*/
+/*************************************************************************/
+int32_t *reorder_freqlpn_db(params_t *params, gk_graph_t *graph)
+{
+  int32_t i, ii, k, nvtxs, maxlbl;
+  ssize_t j, *xadj; 
+  int32_t *adjncy, *labels, *freq, *perm, *dbucket;
+  gk_i32kv_t *cand;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  labels  = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
+  freq    = gk_i32smalloc(nvtxs, 0, "freq");
+  perm    = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
+  dbucket = gk_i32malloc(nvtxs, "dbucket");
+
+  for (i=0; i<nvtxs; i++)
+    dbucket[i] = ((xadj[i+1]-xadj[i])>>3);
+
+  for (k=0; k<params->niter; k++) {
+    gk_i32randArrayPermuteFine(nvtxs, perm, 0);
+    for (ii=0; ii<nvtxs; ii++) {
+      i = perm[ii];
+      maxlbl = labels[i];
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        if (dbucket[i] != dbucket[adjncy[j]])
+          continue;
+
+        freq[labels[adjncy[j]]]++;
+        if (freq[maxlbl] < freq[labels[adjncy[j]]])
+          maxlbl = labels[adjncy[j]];
+        else if (freq[maxlbl] == freq[labels[adjncy[j]]]) {
+          if (RandomInRange(2))
+            maxlbl = labels[adjncy[j]];
+        }
+      }
+      for (j=xadj[i]; j<xadj[i+1]; j++) 
+        freq[labels[adjncy[j]]] = 0;
+      labels[i] = maxlbl;
+    }
+  }
+
+  cand = gk_i32kvmalloc(nvtxs, "cand");
+  for (i=0; i<nvtxs; i++) {
+    cand[i].key = labels[i];
+    cand[i].val = i;
+  }
+  gk_i32kvsorti(nvtxs, cand);
+
+  for (i=0; i<nvtxs; i++)
+    perm[cand[i].val] = i;
+
+  gk_free((void **)&labels, &freq, &dbucket, &cand, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function re-orders the graph by:
+    - performing a fixed number of min-label propagation iterations
+    - locally renumbers the vertices with the same label
+*/
+/*************************************************************************/
+int32_t *reorder_minlpn(params_t *params, gk_graph_t *graph)
+{
+  int32_t i, ii, k, nvtxs, minlbl;
+  ssize_t j, *xadj; 
+  int32_t *adjncy, *labels, *perm;
+  gk_i32kv_t *cand;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
+  perm   = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
+
+  for (k=0; k<params->niter; k++) {
+    for (i=0; i<nvtxs; i++) {
+      minlbl = labels[i];
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        if (minlbl > labels[adjncy[j]])
+          minlbl = labels[adjncy[j]];
+      }
+      labels[i] = minlbl;
+    }
+  }
+
+  cand = gk_i32kvmalloc(nvtxs, "cand");
+  for (i=0; i<nvtxs; i++) {
+    cand[i].key = labels[i];
+    cand[i].val = i;
+  }
+  gk_i32kvsorti(nvtxs, cand);
+
+  for (i=0; i<nvtxs; i++) {
+    perm[cand[i].val] = i;
+    //if (i>0 && cand[i].key != cand[i-1].key)
+    //  printf("%10d %10d\n", i-1, cand[i-1].key);
+  }
+  //printf("%10d %10d\n", i-1, cand[i-1].key);
+
+  gk_free((void **)&labels, &cand, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function re-orders the graph by:
+    - performing a fixed number of min-label propagation iterations 
+    - restricts that propagation to take place within similar degree buckets
+      of vertices
+    - locally renumbers the vertices with the same label
+*/
+/*************************************************************************/
+int32_t *reorder_minlpn_db(params_t *params, gk_graph_t *graph)
+{
+  int32_t i, ii, k, nvtxs, minlbl;
+  ssize_t j, *xadj; 
+  int32_t *adjncy, *labels, *perm, *dbucket;
+  gk_i32kv_t *cand;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  labels  = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
+  perm    = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
+  dbucket = gk_i32malloc(nvtxs, "dbucket");
+
+  for (i=0; i<nvtxs; i++)
+    dbucket[i] = ((xadj[i+1]-xadj[i])>>3);
+
+  for (k=0; k<params->niter; k++) {
+    for (i=0; i<nvtxs; i++) {
+      minlbl = labels[i];
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        if (dbucket[i] != dbucket[adjncy[j]])
+          continue;
+
+        if (minlbl > labels[adjncy[j]])
+          minlbl = labels[adjncy[j]];
+      }
+      labels[i] = minlbl;
+    }
+  }
+
+  cand = gk_i32kvmalloc(nvtxs, "cand");
+  for (i=0; i<nvtxs; i++) {
+    cand[i].key = labels[i];
+    cand[i].val = i;
+  }
+  gk_i32kvsorti(nvtxs, cand);
+
+  for (i=0; i<nvtxs; i++) {
+    perm[cand[i].val] = i;
+    //if (i>0 && cand[i].key != cand[i-1].key)
+    //  printf("%10d %10d\n", i-1, cand[i-1].key);
+  }
+  //printf("%10d %10d\n", i-1, cand[i-1].key);
+
+  gk_free((void **)&labels, &dbucket, &cand, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function prints run parameters */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_graph_t *graph)
+{
+  printf("*******************************************************************************\n");
+  printf(" gkgraph\n\n");
+  printf("Graph Information ----------------------------------------------------------\n");
+  printf(" input file=%s, [%d, %zd]\n", 
+      params->infile, graph->nvtxs, graph->xadj[graph->nvtxs]);
+
+  printf("\n");
+  printf("Options --------------------------------------------------------------------\n");
+  printf(" lnbits=%d, cnbits=%d, type=%d, niter=%d, lamda=%f, eps=%e\n",
+      params->lnbits, params->cnbits, params->type, params->niter, 
+      params->lamda, params->eps);
+
+  printf("\n");
+  printf("Working... -----------------------------------------------------------------\n");
+}
+
+
+/*************************************************************************/
+/*! This function prints final statistics */
+/*************************************************************************/
+void print_final_info(params_t *params)
+{
+  printf("\n");
+  printf("Memory Usage Information -----------------------------------------------------\n");
+  printf("   Maximum memory used:              %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
+  printf("   Current memory used:              %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
+  printf("********************************************************************************\n");
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->lnbits    = 6;
+  params->cnbits    = 13;
+  params->type      = 1;
+  params->niter     = 1;
+  params->eps       = 1e-10;
+  params->lamda     = 0.20;
+  params->nosort    = 0;
+  params->write     = 0;
+  params->infile    = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_LNBITS:
+        if (gk_optarg) params->lnbits = atoi(gk_optarg);
+        break;
+      case CMD_CNBITS:
+        if (gk_optarg) params->cnbits = atoi(gk_optarg);
+        break;
+      case CMD_TYPE:
+        if (gk_optarg) params->type = atoi(gk_optarg);
+        break;
+      case CMD_NITER:
+        if (gk_optarg) params->niter = atoi(gk_optarg);
+        break;
+      case CMD_EPS:
+        if (gk_optarg) params->eps = atof(gk_optarg);
+        break;
+      case CMD_LAMDA:
+        if (gk_optarg) params->lamda = atof(gk_optarg);
+        break;
+      case CMD_NOSORT:
+        params->nosort = 1;
+        break;
+      case CMD_WRITE:
+        params->write = 1;
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 1) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+
+  if (argc-gk_optind > 0) 
+    params->outfile = gk_strdup(argv[gk_optind++]);
+  else
+    params->outfile   = gk_strdup("gkgraph.out");
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  return params;
+}
+
diff --git a/test/gksort.c b/test/gksort.c
new file mode 100644
index 0000000..6543836
--- /dev/null
+++ b/test/gksort.c
@@ -0,0 +1,346 @@
+/*!
+\file  gksort.c
+\brief Testing module for the various sorting routines in GKlib
+
+\date   Started 4/4/2007
+\author George
+\version\verbatim $Id: gksort.c 11058 2011-11-10 00:02:50Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+#define N       10000
+
+/*************************************************************************/
+/*! Testing module for gk_?isort() routine */
+/*************************************************************************/
+void test_isort()
+{
+  gk_idx_t i;
+  int array[N];
+
+  /* test the increasing sort */
+  printf("Testing iisort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_isorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] > array[i+1])
+      printf("gk_isorti error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing disort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_isortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] < array[i+1])
+      printf("gk_isortd error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?fsort() routine */
+/*************************************************************************/
+void test_fsort()
+{
+  gk_idx_t i;
+  float array[N];
+
+  /* test the increasing sort */
+  printf("Testing ifsort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
+
+  gk_fsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] > array[i+1])
+      printf("gk_fsorti error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing dfsort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
+
+  gk_fsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] < array[i+1])
+      printf("gk_fsortd error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?idxsort() routine */
+/*************************************************************************/
+void test_idxsort()
+{
+  gk_idx_t i;
+  gk_idx_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing idxsorti...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_idxsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] > array[i+1])
+      printf("gk_idxsorti error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing idxsortd...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_idxsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] < array[i+1])
+      printf("gk_idxsortd error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
+  }
+
+}
+
+
+
+/*************************************************************************/
+/*! Testing module for gk_?ikvsort() routine */
+/*************************************************************************/
+void test_ikvsort()
+{
+  gk_idx_t i;
+  gk_ikv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing ikvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_ikvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_ikvsorti error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing ikvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_ikvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_ikvsortd error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+
+/*************************************************************************/
+/*! Testing module for gk_?fkvsort() routine */
+/*************************************************************************/
+void test_fkvsort()
+{
+  gk_idx_t i;
+  gk_fkv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing fkvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_fkvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_fkvsorti error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing fkvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_fkvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_fkvsortd error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?dkvsort() routine */
+/*************************************************************************/
+void test_dkvsort()
+{
+  gk_idx_t i;
+  gk_dkv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing dkvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_dkvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_dkvsorti error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing dkvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_dkvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_dkvsortd error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?skvsort() routine */
+/*************************************************************************/
+void test_skvsort()
+{
+  gk_idx_t i;
+  gk_skv_t array[N];
+  char line[256];
+
+  /* test the increasing sort */
+  printf("Testing skvsorti...\n");
+  for (i=0; i<N; i++) {
+    sprintf(line, "%d", RandomInRange(123432));
+    array[i].key = gk_strdup(line);
+    array[i].val = i;
+  }
+
+  gk_skvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (strcmp(array[i].key, array[i+1].key) > 0)
+      printf("gk_skvsorti error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing skvsortd...\n");
+  for (i=0; i<N; i++) {
+    sprintf(line, "%d", RandomInRange(123432));
+    array[i].key = gk_strdup(line);
+    array[i].val = i;
+  }
+
+  gk_skvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    /*printf("%s\n", array[i].key);*/
+    if (strcmp(array[i].key, array[i+1].key) < 0)
+      printf("gk_skvsortd error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?idxkvsort() routine */
+/*************************************************************************/
+void test_idxkvsort()
+{
+  gk_idx_t i;
+  gk_idxkv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing idxkvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_idxkvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_idxkvsorti error at index %zd [%zd %zd] [%zd %zd]\n", 
+          (ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key, 
+          (ssize_t)array[i].val, (ssize_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing idxkvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_idxkvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_idxkvsortd error at index %zd [%zd %zd] [%zd %zd]\n", 
+          (ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key, 
+          (ssize_t)array[i].val, (ssize_t)array[i+1].val);
+  }
+
+}
+
+
+
+
+int main()
+{
+  test_isort();
+  test_fsort();
+  test_idxsort();
+
+  test_ikvsort();
+  test_fkvsort();
+  test_dkvsort();
+  test_skvsort();
+  test_idxkvsort();
+}
+
diff --git a/test/gkuniq.c b/test/gkuniq.c
new file mode 100644
index 0000000..0b4bf68
--- /dev/null
+++ b/test/gkuniq.c
@@ -0,0 +1,268 @@
+/*!
+\file  
+\brief A program to test various implementations for unique.
+
+\date 10/8/2020
+\author George
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  ssize_t length, dupfactor;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_HELP        10
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"help",          0,      0,      CMD_HELP},
+  {0,               0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: gkuniq length dupfactor",
+" ",
+" Required parameters",
+"  length",
+"     The length of the base array.",
+" ",
+"  dupfactor",
+"     The number of times the initial array is replicated.",
+" ",
+" Optional parameters",
+"  -help",
+"     Prints this message.",
+""
+};
+
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[]);
+int unique_v1(int n, int *input, int *output);
+int unique_v2(int n, int *input, int *output);
+int unique_v3(int n, int *input, int *output, int *r_maxsize, int **r_hmap);
+void mem_flush(const void *p, unsigned int allocation_size);
+
+/*************************************************************************/
+/*! A function to flush the cache associated with an array */
+/**************************************************************************/
+void mem_flush(const void *p, unsigned int allocation_size)
+{
+#ifndef NO_X86 
+  const size_t cache_line = 64;
+  const char *cp = (const char *)p;
+  size_t i = 0;
+
+  if (p == NULL || allocation_size <= 0)
+    return;
+
+  for (i = 0; i < allocation_size; i += cache_line) {
+    __asm__ volatile("clflush (%0)\n\t"
+                 :
+                 : "r"(&cp[i])
+                 : "memory");
+  }
+
+  __asm__ volatile("sfence\n\t"
+                :
+                :
+                : "memory");
+#endif
+}
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  int i, j, k;
+  params_t *params;
+  double tmr;
+  int n, nunique, *input, *output;
+  int maxsize=0, *hmap=NULL; 
+ 
+  params = parse_cmdline(argc, argv);
+
+  /* create the input data */
+  n = params->length*params->dupfactor;
+  input  = gk_imalloc(n, "input");
+  output = gk_imalloc(n, "output");
+  for (i=0; i<params->length; i++) {
+    k = RandomInRange(n);
+    for (j=0; j<params->dupfactor; j++)
+      input[j*params->length+i] = k;
+  }
+
+  gk_clearwctimer(tmr);
+  gk_startwctimer(tmr);
+  mem_flush(input, n*sizeof(int));
+  mem_flush(output, n*sizeof(int));
+  nunique = unique_v1(n, input, output);
+  gk_stopwctimer(tmr);
+  printf(" V1: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
+
+  gk_clearwctimer(tmr);
+  gk_startwctimer(tmr);
+  mem_flush(input, n*sizeof(int));
+  mem_flush(output, n*sizeof(int));
+  nunique = unique_v2(n, input, output);
+  gk_stopwctimer(tmr);
+  printf(" V2: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
+
+  gk_clearwctimer(tmr);
+  gk_startwctimer(tmr);
+  mem_flush(input, n*sizeof(int));
+  mem_flush(output, n*sizeof(int));
+  nunique = unique_v3(n, input, output, &maxsize, &hmap);
+  gk_stopwctimer(tmr);
+  printf("V3c: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
+
+  gk_clearwctimer(tmr);
+  gk_startwctimer(tmr);
+  mem_flush(input, n*sizeof(int));
+  mem_flush(output, n*sizeof(int));
+  nunique = unique_v3(n, input, output, &maxsize, &hmap);
+  gk_stopwctimer(tmr);
+  printf("V3w: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
+
+  gk_free((void **)&input, &output, &hmap, LTERM);
+
+}
+
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 2) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(helpstr[i]) > 0; i++)
+      printf("%s\n", helpstr[i]);
+    exit(0);
+  }
+
+  params->length    = atoi(argv[gk_optind++]);
+  params->dupfactor = atoi(argv[gk_optind++]);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! gklib-sort based approach */
+/*************************************************************************/
+int unique_v1(int n, int *input, int *output)
+{
+  int i, j;
+
+  gk_isorti(n, input);
+
+  output[0] = input[0];
+  for (j=0, i=1; i<n; i++) {
+    if (output[j] != input[i]) 
+      output[++j] = input[i];
+  }
+  return j+1;
+}
+
+
+/*************************************************************************/
+/*! hash-table based approach */
+/*************************************************************************/
+int unique_v2(int n, int *input, int *output)
+{
+  int i, j, k, nuniq, size, mask;
+  int *hmap;
+
+  for (size=1; size<2*n; size*=2);
+  mask = size-1;
+  //printf("size: %d, mask: %x\n", size, mask);
+  hmap = gk_ismalloc(size, -1, "hmap");
+
+  for (nuniq=0, i=0; i<n; i++) {
+    k = input[i];
+    for (j=(k&mask); hmap[j]!=-1 && hmap[j]!=k; j=((j+1)&mask));
+    if (hmap[j] == -1) {
+      hmap[j] = k;
+      output[nuniq++] = k;
+    }
+  }
+
+  gk_free((void **)&hmap, LTERM);
+  return nuniq;
+}
+
+
+/*************************************************************************/
+/*! hash-table based approach, where the htable is most likely pre-allocated */
+/*************************************************************************/
+int unique_v3(int n, int *input, int *output, int *r_maxsize, int **r_hmap)
+{
+  int i, j, k, nuniq, size, mask;
+  int *hmap;
+
+  for (size=1; size<2*n; size*=2);
+  mask = size-1;
+  //printf("size: %d, mask: %x\n", size, mask);
+  if (size > *r_maxsize) {
+    gk_free((void **)r_hmap, LTERM);
+    hmap = *r_hmap = gk_ismalloc(size, -1, "hmap");
+    *r_maxsize = size;
+  }
+  else {
+    hmap = *r_hmap;
+    gk_iset(size, -1, hmap);
+  }
+
+  for (nuniq=0, i=0; i<n; i++) {
+    k = input[i];
+    for (j=(k&mask); hmap[j]!=-1 && hmap[j]!=k; j=((j+1)&mask));
+    if (hmap[j] == -1) {
+      hmap[j] = k;
+      output[nuniq++] = k;
+    }
+  }
+
+  return nuniq;
+}
diff --git a/test/grKx.c b/test/grKx.c
new file mode 100644
index 0000000..a72b580
--- /dev/null
+++ b/test/grKx.c
@@ -0,0 +1,256 @@
+/*!
+\file  
+\brief A simple program to create multiple copies of an input matrix.
+
+\date 5/30/2013
+\author George
+\version \verbatim $Id: grKx.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int inf, outf;
+  int numbering;    /* input numbering (output when applicable) */
+  int readvals;     /* input values (output when applicable) */
+  int writevals;    /* output values */
+  int rshuf, cshuf; /* random shuffle of rows/columns */
+  int symmetric;    /* a symmetric shuffle */
+  int ncopies;      /* the copies of the graph to create */
+  char *infile;     /* input file */
+  char *outfile;    /* output file */
+} params_t;
+
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NUMONE        1
+#define CMD_NOREADVALS    2
+#define CMD_NOWRITEVALS   3
+#define CMD_RSHUF         4
+#define CMD_CSHUF         5
+#define CMD_SYMMETRIC     6
+#define CMD_HELP          100
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"numone",      0,      0,      CMD_NUMONE},
+  {"noreadvals",  0,      0,      CMD_NOREADVALS},
+  {"nowritevals", 0,      0,      CMD_NOWRITEVALS},
+  {"rshuf",       0,      0,      CMD_RSHUF},
+  {"cshuf",       0,      0,      CMD_CSHUF},
+  {"symmetric",   0,      0,      CMD_SYMMETRIC},
+  {"help",        0,      0,      CMD_HELP},
+  {0,             0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>",
+" ",
+" Required parameters",
+"  infile, outfile",
+"     The name of the input/output CSR file.",
+" ",
+"  inf/outf",
+"     The format of the input/output file.",
+"     Supported values are:",
+"        1  GK_CSR_FMT_CLUTO",
+"        2  GK_CSR_FMT_CSR",
+"        3  GK_CSR_FMT_METIS",
+"        4  GK_CSR_FMT_BINROW",
+"        6  GK_CSR_FMT_IJV",
+"        7  GK_CSR_FMT_BIJV",
+" ",
+" Optional parameters",
+"  -numone",
+"     Specifies that the numbering of the input file starts from 1. ",
+"     It only applies to CSR/IJV formats.",
+" ",
+"  -nowritevals",
+"     Specifies that no values will be output.",
+" ",
+"  -noreadvals",
+"     Specifies that the values will not be read when applicable.",
+" ",
+"  -rshuf",
+"     Specifies that the rows will be randmly shuffled prior to output.",
+" ",
+"  -cshuf",
+"     Specifies that the columns will be randmly shuffled prior to output.",
+" ",
+"  -symmetric",
+"     Specifies that the row+column shuffling will be symmetric.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>",
+"          use 'csrconv -help' for a summary of the options.",
+""
+};
+ 
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->numbering = 0;
+  params->readvals  = 1;
+  params->writevals = 1;
+  params->rshuf     = 0;
+  params->cshuf     = 0;
+  params->symmetric = 0;
+
+  params->inf       = -1;
+  params->outf      = -1;
+  params->infile    = NULL;
+  params->outfile   = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_NUMONE:
+        params->numbering = 1;
+        break;
+      case CMD_NOREADVALS:
+        params->readvals = 0;
+        break;
+      case CMD_NOWRITEVALS:
+        params->writevals = 0;
+        break;
+      case CMD_RSHUF:
+        params->rshuf = 1;
+        break;
+      case CMD_CSHUF:
+        params->cshuf = 1;
+        break;
+      case CMD_SYMMETRIC:
+        params->symmetric = 1;
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 5) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+  params->inf     = atoi(argv[gk_optind++]);
+  params->outfile = gk_strdup(argv[gk_optind++]);
+  params->outf    = atoi(argv[gk_optind++]);
+  params->ncopies = atoi(argv[gk_optind++]);
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  ssize_t i, j, k, knnz, nrows, ncols, ncopies;
+  int what;
+  params_t *params;
+  gk_csr_t *mat, *kmat, *smat;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  /* read the data */
+  mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering);
+
+  /* create the copies */
+  ncopies = params->ncopies;
+
+  nrows = mat->nrows;
+  ncols = mat->ncols;
+  knnz  = mat->rowptr[nrows]*ncopies;
+
+  kmat         = gk_csr_Create();
+  kmat->nrows  = nrows*ncopies;
+  kmat->ncols  = ncols*ncopies;
+  kmat->rowptr = gk_zmalloc(kmat->nrows+1, "rowptr");
+  kmat->rowind = gk_imalloc(knnz, "rowind");
+  if (mat->rowval)
+    kmat->rowval = gk_fmalloc(knnz, "rowval");
+
+  kmat->rowptr[0] = knnz = 0;
+  for (k=0; k<ncopies; k++) {
+    for (i=0; i<nrows; i++) {
+      for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++, knnz++) {
+        kmat->rowind[knnz] = mat->rowind[j] + k*ncols;
+        if (mat->rowval)
+          kmat->rowval[knnz] = mat->rowval[j];
+      }
+      kmat->rowptr[k*nrows+i+1] = knnz;
+    }
+  }
+
+  gk_csr_Free(&mat);
+  mat = kmat;
+
+
+  if (params->rshuf || params->cshuf) {
+    if (params->rshuf && params->cshuf)
+      what = GK_CSR_ROWCOL;
+    else if (params->rshuf)
+      what = GK_CSR_ROW;
+    else
+      what = GK_CSR_COL;
+
+    smat = gk_csr_Shuffle(mat, what, params->symmetric);
+    gk_csr_Free(&mat);
+    mat = smat;
+  }
+
+  if (params->writevals && mat->rowval == NULL) 
+    mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval");
+
+  gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0);
+
+  gk_csr_Free(&mat);
+
+}
+
diff --git a/test/m2mnbrs.c b/test/m2mnbrs.c
new file mode 100644
index 0000000..53f35ca
--- /dev/null
+++ b/test/m2mnbrs.c
@@ -0,0 +1,304 @@
+/*!
+\file  
+\brief It takes as input two CSR matrices and finds for each row of the 
+       first matrix the most similar rows in the second matrix.
+
+\date 9/27/2014
+\author George
+\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int simtype;             /*!< The similarity type to use */
+  int nnbrs;               /*!< The maximum number of nearest neighbots to output */
+  float minsim;            /*!< The minimum similarity to use for keeping neighbors */
+
+  int verbosity;           /*!< The reporting verbosity level */
+
+  char *qfile;             /*!< The file storing the query documents */
+  char *cfile;             /*!< The file storing the collection documents */
+  char *outfile;           /*!< The file where the output will be stored */
+
+  /* timers */
+  double timer_global;
+  double timer_1;
+  double timer_2;
+  double timer_3;
+  double timer_4;
+} params_t;
+
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+/* Versions */
+#define VER_MAJOR           0
+#define VER_MINOR           1
+#define VER_SUBMINOR        0
+
+/* Command-line option codes */
+#define CMD_SIMTYPE         10
+#define CMD_NNBRS           20
+#define CMD_MINSIM          22
+#define CMD_VERBOSITY       70
+#define CMD_HELP            100
+
+/* The text labels for the different simtypes */
+static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""};
+
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"simtype",           1,      0,      CMD_SIMTYPE},
+  {"nnbrs",             1,      0,      CMD_NNBRS},
+  {"minsim",            1,      0,      CMD_MINSIM},
+  {"verbosity",         1,      0,      CMD_VERBOSITY},
+
+  {"help",              0,      0,      CMD_HELP},
+  {0,                   0,      0,      0}
+};
+
+static gk_StringMap_t simtype_options[] = {
+  {"cos",                GK_CSR_COS},
+  {"jac",                GK_CSR_JAC},
+  {NULL,                 0}
+};
+
+
+/*-------------------------------------------------------------------
+ * Mini help
+ *-------------------------------------------------------------------*/
+static char helpstr[][100] =
+{
+" ",
+"Usage: m2mnbrs [options] qfile cfile [outfile]",
+" ",
+" Options",
+"  -simtype=string",
+"     Specifies the type of similarity to use. Possible values are:",
+"       cos   - Cosine similarity",
+"       jac   - Jacquard similarity [default]", 
+" ",
+"  -nnbrs=int",
+"     Specifies the maximum number of nearest neighbors.",
+"     A value of -1 indicates that all neighbors will be considered.",
+"     Default value is 100.",
+" ",
+"  -minsim=float",
+"     The minimum allowed similarity between neighbors. ",
+"     Default value is .25.",
+" ",
+"  -verbosity=int",
+"     Specifies the level of debugging information to be displayed.",
+"     Default value is 0.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[]);
+void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat);
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->simtype   = GK_CSR_JAC;
+  params->nnbrs     = 100;
+  params->minsim    = .25;
+  params->verbosity = -1;
+  params->qfile     = NULL;
+  params->cfile     = NULL;
+  params->outfile   = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_SIMTYPE:
+        if (gk_optarg) {
+          if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1)
+            errexit("Invalid simtype of %s.\n", gk_optarg);
+        }
+        break;
+
+      case CMD_NNBRS:
+        if (gk_optarg) params->nnbrs = atoi(gk_optarg);
+        break;
+
+      case CMD_MINSIM:
+        if (gk_optarg) params->minsim = atof(gk_optarg);
+        break;
+
+      case CMD_VERBOSITY:
+        if (gk_optarg) params->verbosity = atoi(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(EXIT_SUCCESS);
+        break;
+
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(EXIT_FAILURE);
+    }
+  }
+
+  /* Get the input/output file info */
+  if (argc-gk_optind < 1) {
+    printf("Missing input/output file info.\n  Use %s -help for a summary of the options.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  params->qfile   = gk_strdup(argv[gk_optind++]);
+  params->cfile   = gk_strdup(argv[gk_optind++]);
+  params->outfile = (gk_optind < argc ? gk_strdup(argv[gk_optind++]) : NULL);
+
+  if (!gk_fexists(params->qfile))
+    errexit("input file %s does not exist.\n", params->qfile);
+  if (!gk_fexists(params->cfile))
+    errexit("input file %s does not exist.\n", params->cfile);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the program */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  params_t *params;
+  gk_csr_t *qmat, *cmat;
+  int rc = EXIT_SUCCESS;
+
+  params = parse_cmdline(argc, argv);
+
+  qmat = gk_csr_Read(params->qfile, GK_CSR_FMT_CSR, 1, 0);
+  cmat = gk_csr_Read(params->cfile, GK_CSR_FMT_CSR, 1, 0);
+
+
+  printf("********************************************************************************\n");
+  printf("sd (%d.%d.%d) Copyright 2014, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR);
+  printf("  simtype=%s, nnbrs=%d, minsim=%.2f\n",
+      simtypenames[params->simtype], params->nnbrs, params->minsim);
+  printf("  qfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
+      params->qfile, qmat->nrows, qmat->ncols, qmat->rowptr[qmat->nrows]);
+  printf("  cfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
+      params->cfile, cmat->nrows, cmat->ncols, cmat->rowptr[cmat->nrows]);
+
+  gk_clearwctimer(params->timer_global);
+  gk_clearwctimer(params->timer_1);
+  gk_clearwctimer(params->timer_2);
+  gk_clearwctimer(params->timer_3);
+  gk_clearwctimer(params->timer_4);
+
+  gk_startwctimer(params->timer_global);
+
+  FindNeighbors(params, qmat, cmat);
+
+  gk_stopwctimer(params->timer_global);
+
+  printf("    wclock: %.2lfs\n", gk_getwctimer(params->timer_global));
+  printf("    timer1: %.2lfs\n", gk_getwctimer(params->timer_1));
+  printf("    timer2: %.2lfs\n", gk_getwctimer(params->timer_2));
+  printf("    timer3: %.2lfs\n", gk_getwctimer(params->timer_3));
+  printf("    timer4: %.2lfs\n", gk_getwctimer(params->timer_4));
+  printf("********************************************************************************\n");
+
+  gk_csr_Free(&qmat);
+  gk_csr_Free(&cmat);
+
+  exit(rc);
+}
+
+
+/*************************************************************************/
+/*! Reads and computes the neighbors of each query document against the
+    collection of documents */
+/**************************************************************************/
+void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat)
+{
+  int iQ, iH, nhits;
+  int32_t *marker;
+  gk_fkv_t *hits, *cand;
+  FILE *fpout;
+
+  GKASSERT(qmat->ncols <= cmat->ncols);
+
+  /* if cosine, make rows unit length */
+  if (params->simtype == GK_CSR_COS) {
+    gk_csr_Normalize(qmat, GK_CSR_ROW, 2);
+    gk_csr_Normalize(cmat, GK_CSR_ROW, 2);
+  }
+
+  /* create the inverted index */
+  gk_csr_CreateIndex(cmat, GK_CSR_COL);
+
+  /* compute the row norms */
+  gk_csr_ComputeSquaredNorms(cmat, GK_CSR_ROW);
+
+  /* create the output file */
+  fpout = (params->outfile ? gk_fopen(params->outfile, "w", "FindNeighbors: fpout") : NULL);
+
+  /* allocate memory for the necessary working arrays */
+  hits   = gk_fkvmalloc(cmat->nrows, "FindNeighbors: hits");
+  marker = gk_i32smalloc(cmat->nrows, -1, "FindNeighbors: marker");
+  cand   = gk_fkvmalloc(cmat->nrows, "FindNeighbors: cand");
+
+
+  /* find the best neighbors for each query document */
+  gk_startwctimer(params->timer_1);
+  for (iQ=0; iQ<qmat->nrows; iQ++) {
+    if (params->verbosity > 0)
+      printf("Working on query %7d\n", iQ);
+
+    /* find the neighbors of the ith document */ 
+    nhits = gk_csr_GetSimilarRows(cmat, 
+                 qmat->rowptr[iQ+1]-qmat->rowptr[iQ], 
+                 qmat->rowind+qmat->rowptr[iQ], 
+                 qmat->rowval+qmat->rowptr[iQ], 
+                 params->simtype, params->nnbrs, params->minsim, 
+                 hits, marker, cand);
+
+    /* write the results in the file */
+    if (fpout) {
+      for (iH=0; iH<nhits; iH++) 
+        fprintf(fpout, "%8d %8zd %.3f\n", iQ, hits[iH].val, hits[iH].key);
+    }
+  }
+  gk_stopwctimer(params->timer_1);
+
+
+  /* cleanup and exit */
+  if (fpout) gk_fclose(fpout);
+
+  gk_free((void **)&hits, &marker, &cand, LTERM);
+}
+
diff --git a/test/rw.c b/test/rw.c
new file mode 100644
index 0000000..1a3295e
--- /dev/null
+++ b/test/rw.c
@@ -0,0 +1,306 @@
+/*!
+\file  
+\brief A simple (personalized) random walk program to test GKlib's routines
+
+\date 6/12/2008
+\author George
+\version \verbatim $Id$ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int niter;
+  int ntvs;
+  int ppr;
+  float eps;
+  float lamda;
+  char *infile;
+  char *outfile;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NITER       1
+#define CMD_EPS         2
+#define CMD_LAMDA       3
+#define CMD_PPR         4
+#define CMD_NTVS        5
+#define CMD_HELP        10
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"niter",      1,      0,      CMD_NITER},
+  {"lamda",      1,      0,      CMD_LAMDA},
+  {"eps",        1,      0,      CMD_EPS},
+  {"ppr",        1,      0,      CMD_PPR},
+  {"ntvs",       1,      0,      CMD_NTVS},
+  {"help",       0,      0,      CMD_HELP},
+  {0,            0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: rw [options] <graph-file> <out-file>",
+" ",
+" Required parameters",
+"  graph-file",
+"     The name of the file storing the transactions. The file is in ",
+"     Metis' graph format.",
+" ",
+" Optional parameters",
+"  -niter=int",
+"     Specifies the maximum number of iterations. [default: 100]",
+" ",
+"  -lamda=float",
+"     Specifies the follow-the-adjacent-links probability. [default: 0.80]",
+" ",
+"  -eps=float",
+"     Specifies the error tollerance. [default: 1e-10]",
+" ",
+"  -ppr=int",
+"     Specifies the source of the personalized PR. [default: -1]",
+" ",
+"  -ntvs=int",
+"     Specifies the number of test-vectors to compute. [default: -1]",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: rw [options] <graph-file> <out-file>",
+"          use 'rw -help' for a summary of the options.",
+""
+};
+ 
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat);
+void print_final_info(params_t *params);
+params_t *parse_cmdline(int argc, char *argv[]);
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  ssize_t i, j, niter;
+  params_t *params;
+  gk_csr_t *mat;
+  FILE *fpout;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  /* read the data */
+  mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1);
+
+  /* display some basic stats */
+  print_init_info(params, mat);
+
+
+  if (params->ntvs != -1) {
+    /* compute the pr for different randomly generated restart-distribution vectors */
+    float **prs;
+
+    prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs");
+
+    /* generate the random restart vectors */
+    for (j=0; j<params->ntvs; j++) {
+      for (i=0; i<mat->nrows; i++)
+        prs[j][i] = RandomInRange(931);
+      gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1);
+
+      niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]);
+      printf("tvs#: %zd; niters: %zd\n", j, niter);
+    }
+
+    /* output the computed pr scores */
+    fpout = gk_fopen(params->outfile, "w", "main: outfile");
+    for (i=0; i<mat->nrows; i++) {
+      for (j=0; j<params->ntvs; j++) 
+        fprintf(fpout, "%.4e ", prs[j][i]);
+      fprintf(fpout, "\n");
+    }
+    gk_fclose(fpout);
+
+    gk_fFreeMatrix(&prs, params->ntvs, mat->nrows);
+  }
+  else if (params->ppr != -1) {
+    /* compute the personalized pr from the specified vertex */
+    float *pr;
+
+    pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr");
+
+    pr[params->ppr-1] = 1.0;
+
+    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
+    printf("ppr: %d; niters: %zd\n", params->ppr, niter);
+
+    /* output the computed pr scores */
+    fpout = gk_fopen(params->outfile, "w", "main: outfile");
+    for (i=0; i<mat->nrows; i++) 
+      fprintf(fpout, "%.4e\n", pr[i]);
+    gk_fclose(fpout);
+
+    gk_free((void **)&pr, LTERM);
+  }
+  else {
+    /* compute the standard pr */
+    int jmax;
+    float diff, maxdiff;
+    float *pr;
+
+    pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr");
+
+    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
+    printf("pr; niters: %zd\n", niter);
+
+    /* output the computed pr scores */
+    fpout = gk_fopen(params->outfile, "w", "main: outfile");
+    for (i=0; i<mat->nrows; i++) {
+      for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+        if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) {
+          maxdiff = diff;
+          jmax = mat->rowind[j];
+        }
+      }
+      fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], 
+          mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1);
+    }
+    gk_fclose(fpout);
+
+    gk_free((void **)&pr, LTERM);
+  }
+
+  gk_csr_Free(&mat);
+
+  /* display some final stats */
+  print_final_info(params);
+}
+
+
+
+/*************************************************************************/
+/*! This function prints run parameters */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat)
+{
+  printf("*******************************************************************************\n");
+  printf(" fis\n\n");
+  printf("Matrix Information ---------------------------------------------------------\n");
+  printf(" input file=%s, [%d, %d, %zd]\n", 
+      params->infile, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
+
+  printf("\n");
+  printf("Options --------------------------------------------------------------------\n");
+  printf(" niter=%d, ntvs=%d, ppr=%d, lamda=%f, eps=%e\n",
+      params->niter, params->ntvs, params->ppr, params->lamda, params->eps);
+
+  printf("\n");
+  printf("Performing random walks... ----------------------------------------------\n");
+}
+
+
+/*************************************************************************/
+/*! This function prints final statistics */
+/*************************************************************************/
+void print_final_info(params_t *params)
+{
+  printf("\n");
+  printf("Memory Usage Information -----------------------------------------------------\n");
+  printf("   Maximum memory used:              %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
+  printf("   Current memory used:              %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
+  printf("********************************************************************************\n");
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->niter     = 100;
+  params->ppr       = -1;
+  params->ntvs      = -1;
+  params->eps       = 1e-10;
+  params->lamda     = 0.80;
+  params->infile    = NULL;
+  params->outfile   = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_NITER:
+        if (gk_optarg) params->niter = atoi(gk_optarg);
+        break;
+      case CMD_NTVS:
+        if (gk_optarg) params->ntvs = atoi(gk_optarg);
+        break;
+      case CMD_PPR:
+        if (gk_optarg) params->ppr = atoi(gk_optarg);
+        break;
+      case CMD_EPS:
+        if (gk_optarg) params->eps = atof(gk_optarg);
+        break;
+      case CMD_LAMDA:
+        if (gk_optarg) params->lamda = atof(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 2) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+  params->outfile = gk_strdup(argv[gk_optind++]);
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  if (params->ppr != -1 && params->ntvs != -1)
+    errexit("Only one of the -ppr and -ntvs options can be specified.\n");
+
+  return params;
+}
+
diff --git a/test/splatt2svd.c b/test/splatt2svd.c
new file mode 100644
index 0000000..111d31c
--- /dev/null
+++ b/test/splatt2svd.c
@@ -0,0 +1,98 @@
+/*!
+\file 
+\brief A simple program to convert a tensor in coordinate format into an unfolded 
+       matrix
+
+\author George
+*/
+
+#include <GKlib.h>
+
+
+int main(int argc, char *argv[])
+{
+  size_t nnz, i, j, k, nI, nJ, nK, nrows, ncols;
+  int32_t *I, *J, *K, *rowind, *colind;
+  ssize_t *rowptr, *colptr;
+  float *V, *rowval, *colval;
+
+  if (argc != 2) 
+    errexit("Usage %s <infile> [%d]\n", argv[0], argc);
+
+  if (!gk_fexists(argv[1]))
+    errexit("File %s does not exist.\n", argv[1]);
+
+  gk_getfilestats(argv[1], &nnz, NULL, NULL, NULL);
+  I = gk_i32malloc(nnz, "I");
+  J = gk_i32malloc(nnz, "J");
+  K = gk_i32malloc(nnz, "K");
+  V = gk_fmalloc(nnz, "V");
+
+  fprintf(stderr, "Input nnz: %zd\n", nnz);
+
+  FILE *fpin = gk_fopen(argv[1], "r", "infile");
+  for (i=0; i<nnz; i++) {
+    if (4 != fscanf(fpin, "%d %d %d %f", K+i, I+i, J+i, V+i))
+      errexit("Failed to read 4 values in line %zd\n", i);
+    K[i]--; I[i]--; J[i]--;
+  }
+  gk_fclose(fpin);
+
+  nI = gk_i32max(nnz, I, 1)+1;
+  nJ = gk_i32max(nnz, J, 1)+1;
+  nK = gk_i32max(nnz, K, 1)+1;
+
+  fprintf(stderr, "nI: %zd, nJ: %zd, nK: %zd\n", nI, nJ, nK);
+
+  nrows = nK*nI;
+  ncols = nJ;
+  rowptr = gk_zsmalloc(nrows+1, 0, "rowptr");
+  for (i=0; i<nnz; i++) 
+    rowptr[K[i]*nI+I[i]]++;
+  MAKECSR(i, nrows, rowptr);
+
+  rowind = gk_i32malloc(nnz, "rowind");
+  rowval = gk_fmalloc(nnz, "rowval");
+  for (i=0; i<nnz; i++) {
+    rowind[rowptr[K[i]*nI+I[i]]] = J[i];
+    rowval[rowptr[K[i]*nI+I[i]]] = V[i];
+    rowptr[K[i]*nI+I[i]]++;
+  }
+  SHIFTCSR(i, nrows, rowptr);
+
+  gk_free((void **)&I, &J, &K, &V, LTERM);
+
+  colptr = gk_zsmalloc(ncols+1, 0, "colptr");
+  colind = gk_i32malloc(nnz, "colind");
+  colval = gk_fmalloc(nnz, "colval");
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++)
+      colptr[rowind[j]]++;
+  }
+  MAKECSR(i, ncols, colptr);
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      colind[colptr[rowind[j]]] = i;
+      colval[colptr[rowind[j]]] = rowval[j];
+      colptr[rowind[j]]++;
+    }
+  }
+  SHIFTCSR(i, ncols, colptr);
+
+  /* sanity check */
+  for (i=0; i<ncols; i++) {
+    for (j=colptr[i]+1; j<colptr[i+1]; j++) {
+      if (colind[j-1] == colind[j])
+        fprintf(stderr, "Duplicate row indices: %d %d %d\n", (int)i, colind[j], colind[j-1]);
+    }
+  }
+
+  printf("%zd %zd %zd\n", nrows, ncols, nnz);
+  for (i=0; i<ncols; i++) {
+    printf("%zd\n", colptr[i+1]-colptr[i]);
+    for (j=colptr[i]; j<colptr[i+1]; j++)
+      printf("%d %.3f\n", colind[j], colval[j]);
+  }
+
+}
+
diff --git a/test/strings.c b/test/strings.c
new file mode 100644
index 0000000..b241d3f
--- /dev/null
+++ b/test/strings.c
@@ -0,0 +1,82 @@
+/*!
+\file strings.c
+\brief Testing module for the string functions in GKlib
+
+\date Started 3/5/2007
+\author George
+\version\verbatim $Id: strings.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Testing module for gk_strstr_replace()  */
+/*************************************************************************/
+void test_strstr_replace()
+{
+  char *new_str;
+  int rc;
+
+  rc = gk_strstr_replace("This is a simple string", "s", "S", "", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("This is a simple string", "s", "S", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w(\\w+)\\w\\b", "$1", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w+\\b", "word", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
+                          "(http://www\\.cs\\.umn\\.edu/)(.*)-T(\\d+)", "$1$2-P$3", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
+                          "(\\d+)", "number:$1", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
+                          "(http://www\\.cs\\.umn\\.edu/)", "[$1]", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+
+}
+
+
+
+int main()
+{
+  test_strstr_replace();
+
+/*
+  {
+  int i;
+  for (i=0; i<1000; i++)
+    printf("%d\n", RandomInRange(3));
+  }
+*/
+}
+
diff --git a/timers.c b/timers.c
new file mode 100644
index 0000000..bb8f296
--- /dev/null
+++ b/timers.c
@@ -0,0 +1,52 @@
+/*!
+\file  timers.c
+\brief Various timing functions 
+
+\date   Started 4/12/2007
+\author George
+\version\verbatim $Id: timers.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+
+
+/*************************************************************************
+* This function returns the CPU seconds
+**************************************************************************/
+double gk_WClockSeconds(void)
+{
+#ifdef __GNUC__
+  struct timeval ctime;
+
+  gettimeofday(&ctime, NULL);
+
+  return (double)ctime.tv_sec + (double).000001*ctime.tv_usec;
+#else
+  return (double)time(NULL);
+#endif
+}
+
+
+/*************************************************************************
+* This function returns the CPU seconds
+**************************************************************************/
+double gk_CPUSeconds(void)
+{
+//#ifdef __OPENMP__
+#ifdef __OPENMPXXXX__
+  return omp_get_wtime();
+#else
+  #if defined(WIN32) || defined(__MINGW32__)
+    return((double) clock()/CLOCKS_PER_SEC);
+  #else
+    struct rusage r;
+
+    getrusage(RUSAGE_SELF, &r);
+    return ((r.ru_utime.tv_sec + r.ru_stime.tv_sec) + 1.0e-6*(r.ru_utime.tv_usec + r.ru_stime.tv_usec));
+  #endif
+#endif
+}
+
diff --git a/tokenizer.c b/tokenizer.c
new file mode 100644
index 0000000..5efd262
--- /dev/null
+++ b/tokenizer.c
@@ -0,0 +1,77 @@
+/*!
+\file  tokenizer.c
+\brief String tokenization routines
+
+This file contains various routines for splitting an input string into
+tokens and returning them in form of a list. The goal is to mimic perl's 
+split function.
+
+\date   Started 11/23/04
+\author George
+\version\verbatim $Id: tokenizer.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+/************************************************************************
+* This function tokenizes a string based on the user-supplied delimiters
+* list. The resulting tokens are returned into an array of strings.
+*************************************************************************/
+void gk_strtokenize(char *str, char *delim, gk_Tokens_t *tokens)
+{
+  int i, ntoks, slen;
+
+  tokens->strbuf = gk_strdup(str);
+
+  slen  = strlen(str);
+  str   = tokens->strbuf;
+
+  /* Scan once to determine the number of tokens */
+  for (ntoks=0, i=0; i<slen;) {
+    /* Consume all the consecutive characters from the delimiters list */
+    while (i<slen && strchr(delim, str[i])) 
+      i++;
+
+    if (i == slen)
+      break;
+
+    ntoks++;
+
+    /* Consume all the consecutive characters from the token */
+    while (i<slen && !strchr(delim, str[i])) 
+      i++;
+  }
+
+
+  tokens->ntoks = ntoks;
+  tokens->list  = (char **)gk_malloc(ntoks*sizeof(char *), "strtokenize: tokens->list");
+
+
+  /* Scan a second time to mark and link the tokens */
+  for (ntoks=0, i=0; i<slen;) {
+    /* Consume all the consecutive characters from the delimiters list */
+    while (i<slen && strchr(delim, str[i])) 
+      str[i++] = '\0';
+
+    if (i == slen)
+      break;
+
+    tokens->list[ntoks++] = str+i;
+
+    /* Consume all the consecutive characters from the token */
+    while (i<slen && !strchr(delim, str[i])) 
+      i++;
+  }
+}
+
+
+/************************************************************************
+* This function frees the memory associated with a gk_Tokens_t
+*************************************************************************/
+void gk_freetokenslist(gk_Tokens_t *tokens)
+{
+  gk_free((void *)&tokens->list, &tokens->strbuf, LTERM);
+}
+
diff --git a/win32/adapt.c b/win32/adapt.c
new file mode 100644
index 0000000..546857c
--- /dev/null
+++ b/win32/adapt.c
@@ -0,0 +1,11 @@
+/*
+\file  win32/adapt.c
+\brief Implementation of Win32 adaptation of libc functions
+*/
+
+#include "adapt.h"
+
+pid_t getpid(void)
+{
+  return GetCurrentProcessId();
+}
diff --git a/win32/adapt.h b/win32/adapt.h
new file mode 100644
index 0000000..35e60ed
--- /dev/null
+++ b/win32/adapt.h
@@ -0,0 +1,14 @@
+/*
+\file  win32/adapt.h
+\brief Declaration of Win32 adaptation of POSIX functions and types
+*/
+#ifndef _WIN32_ADAPT_H_
+#define _WIN32_ADAPT_H_
+
+#include <windows.h>
+
+typedef DWORD pid_t;
+
+pid_t getpid(void);
+
+#endif  /* _WIN32_ADAPT_H_ */