From f0eba24f897981c6cefb439be840440bbc217c3b Mon Sep 17 00:00:00 2001
From: lib <noreply@acem.ece.illinois.edu>
Date: Thu, 22 Jun 2023 15:57:42 -0500
Subject: [PATCH] Initial commit

---
 CMakeLists.txt              |    31 +
 DEPENDENCIES                |     0
 GKlib.h                     |    85 +
 GKlibSystem.cmake           |   152 +
 LICENSE.txt                 |    18 +
 Makefile                    |    87 +
 README.md                   |    54 +
 SETUP                       |    12 +
 b64.c                       |    95 +
 blas.c                      |    37 +
 cache.c                     |   126 +
 conf/check_thread_storage.c |     5 +
 csr.c                       |  3378 +++++++++++
 error.c                     |   214 +
 evaluate.c                  |   132 +
 fkvkselect.c                |   142 +
 fs.c                        |   225 +
 getopt.c                    |   855 +++
 gk_arch.h                   |    70 +
 gk_defs.h                   |    87 +
 gk_externs.h                |    25 +
 gk_getopt.h                 |    64 +
 gk_macros.h                 |   169 +
 gk_mkblas.h                 |   203 +
 gk_mkmemory.h               |   142 +
 gk_mkpqueue.h               |   440 ++
 gk_mkpqueue2.h              |   215 +
 gk_mkrandom.h               |   123 +
 gk_mksort.h                 |   271 +
 gk_mkutils.h                |    40 +
 gk_ms_inttypes.h            |   301 +
 gk_ms_stat.h                |    22 +
 gk_ms_stdint.h              |   222 +
 gk_proto.h                  |   426 ++
 gk_struct.h                 |   296 +
 gk_types.h                  |    38 +
 gk_util.c                   |   107 +
 gkregex.c                   | 10704 ++++++++++++++++++++++++++++++++++
 gkregex.h                   |   556 ++
 graph.c                     |  1940 ++++++
 htable.c                    |   247 +
 io.c                        |   681 +++
 itemsets.c                  |   210 +
 mcore.c                     |   393 ++
 memory.c                    |   307 +
 pqueue.c                    |    25 +
 random.c                    |   136 +
 rw.c                        |   103 +
 scripts/gexpand.pl          |    53 +
 seq.c                       |   174 +
 sort.c                      |   437 ++
 string.c                    |   530 ++
 test/CMakeLists.txt         |    19 +
 test/cmpnbrs.c              |   301 +
 test/csrcnv.c               |   397 ++
 test/fis.c                  |   286 +
 test/gkgraph.c              |   845 +++
 test/gksort.c               |   346 ++
 test/gkuniq.c               |   268 +
 test/grKx.c                 |   256 +
 test/m2mnbrs.c              |   304 +
 test/rw.c                   |   306 +
 test/splatt2svd.c           |    98 +
 test/strings.c              |    82 +
 timers.c                    |    52 +
 tokenizer.c                 |    77 +
 win32/adapt.c               |    11 +
 win32/adapt.h               |    14 +
 68 files changed, 29067 insertions(+)
 create mode 100644 CMakeLists.txt
 create mode 100644 DEPENDENCIES
 create mode 100644 GKlib.h
 create mode 100644 GKlibSystem.cmake
 create mode 100644 LICENSE.txt
 create mode 100644 Makefile
 create mode 100644 README.md
 create mode 100755 SETUP
 create mode 100644 b64.c
 create mode 100644 blas.c
 create mode 100644 cache.c
 create mode 100644 conf/check_thread_storage.c
 create mode 100644 csr.c
 create mode 100644 error.c
 create mode 100644 evaluate.c
 create mode 100644 fkvkselect.c
 create mode 100644 fs.c
 create mode 100644 getopt.c
 create mode 100644 gk_arch.h
 create mode 100644 gk_defs.h
 create mode 100644 gk_externs.h
 create mode 100644 gk_getopt.h
 create mode 100644 gk_macros.h
 create mode 100644 gk_mkblas.h
 create mode 100644 gk_mkmemory.h
 create mode 100644 gk_mkpqueue.h
 create mode 100644 gk_mkpqueue2.h
 create mode 100644 gk_mkrandom.h
 create mode 100644 gk_mksort.h
 create mode 100644 gk_mkutils.h
 create mode 100644 gk_ms_inttypes.h
 create mode 100644 gk_ms_stat.h
 create mode 100644 gk_ms_stdint.h
 create mode 100644 gk_proto.h
 create mode 100644 gk_struct.h
 create mode 100644 gk_types.h
 create mode 100644 gk_util.c
 create mode 100644 gkregex.c
 create mode 100644 gkregex.h
 create mode 100644 graph.c
 create mode 100644 htable.c
 create mode 100644 io.c
 create mode 100644 itemsets.c
 create mode 100644 mcore.c
 create mode 100644 memory.c
 create mode 100644 pqueue.c
 create mode 100644 random.c
 create mode 100644 rw.c
 create mode 100644 scripts/gexpand.pl
 create mode 100644 seq.c
 create mode 100644 sort.c
 create mode 100644 string.c
 create mode 100644 test/CMakeLists.txt
 create mode 100644 test/cmpnbrs.c
 create mode 100644 test/csrcnv.c
 create mode 100644 test/fis.c
 create mode 100644 test/gkgraph.c
 create mode 100644 test/gksort.c
 create mode 100644 test/gkuniq.c
 create mode 100644 test/grKx.c
 create mode 100644 test/m2mnbrs.c
 create mode 100644 test/rw.c
 create mode 100644 test/splatt2svd.c
 create mode 100644 test/strings.c
 create mode 100644 timers.c
 create mode 100644 tokenizer.c
 create mode 100644 win32/adapt.c
 create mode 100644 win32/adapt.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..9cd1b4b
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,31 @@
+cmake_minimum_required(VERSION 2.8)
+project(GKlib C)
+
+option(BUILD_SHARED_LIBS "Build shared libraries (.dll/.so) instead of static ones (.lib/.a)" OFF)
+
+get_filename_component(abs "." ABSOLUTE)
+set(GKLIB_PATH ${abs})
+unset(abs)
+include(GKlibSystem.cmake)
+
+include_directories(".")
+if(MSVC)
+  include_directories("win32")
+  file(GLOB win32_sources RELATIVE "win32" "*.c")
+else(MSVC)
+  set(win32_sources, "")
+endif(MSVC)
+
+add_library(GKlib ${GKlib_sources} ${win32_sources})
+
+if(UNIX)
+  target_link_libraries(GKlib m)
+endif(UNIX)
+
+include_directories("test")
+add_subdirectory("test")
+
+install(TARGETS GKlib
+  ARCHIVE DESTINATION lib/${LINSTALL_PATH}
+  LIBRARY DESTINATION lib/${LINSTALL_PATH})
+install(FILES ${GKlib_includes} DESTINATION include/${HINSTALL_PATH})
diff --git a/DEPENDENCIES b/DEPENDENCIES
new file mode 100644
index 0000000..e69de29
diff --git a/GKlib.h b/GKlib.h
new file mode 100644
index 0000000..9278fe4
--- /dev/null
+++ b/GKlib.h
@@ -0,0 +1,85 @@
+/*
+ * GKlib.h
+ * 
+ * George's library of most frequently used routines
+ *
+ * $Id: GKlib.h 14866 2013-08-03 16:40:04Z karypis $
+ *
+ */
+
+#ifndef _GKLIB_H_
+#define _GKLIB_H_ 1
+
+#define GKMSPACE
+
+#if defined(_MSC_VER)
+#define __MSC__
+#endif
+#if defined(__ICC)
+#define __ICC__
+#endif
+
+
+#include "gk_arch.h" /*!< This should be here, prior to the includes */
+
+
+/*************************************************************************
+* Header file inclusion section
+**************************************************************************/
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <memory.h>
+#include <errno.h>
+#include <ctype.h>
+#include <math.h>
+#include <float.h>
+#include <time.h>
+#include <string.h>
+#include <limits.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+#include <sys/stat.h>
+
+#if defined(__WITHPCRE__)
+  #include <pcreposix.h>
+#else
+  #if defined(USE_GKREGEX)
+    #include "gkregex.h"
+  #else
+    #include <regex.h>
+  #endif /* defined(USE_GKREGEX) */
+#endif /* defined(__WITHPCRE__) */
+
+
+
+#if defined(__OPENMP__) 
+#include <omp.h>
+#endif
+
+
+
+
+#include <gk_types.h>
+#include <gk_struct.h>
+#include <gk_externs.h>
+#include <gk_defs.h>
+#include <gk_macros.h>
+#include <gk_getopt.h>
+
+#include <gk_mksort.h>
+#include <gk_mkblas.h>
+#include <gk_mkmemory.h>
+#include <gk_mkpqueue.h>
+#include <gk_mkpqueue2.h>
+#include <gk_mkrandom.h>
+#include <gk_mkutils.h>
+
+#include <gk_proto.h>
+
+
+#endif  /* GKlib.h */
+
+
diff --git a/GKlibSystem.cmake b/GKlibSystem.cmake
new file mode 100644
index 0000000..31a1cf1
--- /dev/null
+++ b/GKlibSystem.cmake
@@ -0,0 +1,152 @@
+# Helper modules.
+include(CheckFunctionExists)
+include(CheckIncludeFile)
+
+# Setup options.
+option(GDB "enable use of GDB" OFF)
+option(ASSERT "turn asserts on" OFF)
+option(ASSERT2 "additional assertions" OFF)
+option(DEBUG "add debugging support" OFF)
+option(GPROF "add gprof support" OFF)
+option(VALGRIND "add valgrind support" OFF)
+option(OPENMP "enable OpenMP support" OFF)
+option(PCRE "enable PCRE support" OFF)
+option(GKREGEX "enable GKREGEX support" OFF)
+option(GKRAND "enable GKRAND support" OFF)
+option(NO_X86 "enable NO_X86 support" OFF)
+
+
+# Add compiler flags.
+if(MSVC)
+  set(GKlib_COPTS "/Ox")
+  set(GKlib_COPTIONS "-DWIN32 -DMSC -D_CRT_SECURE_NO_DEPRECATE -DUSE_GKREGEX")
+elseif(MINGW)
+  set(GKlib_COPTS "-DUSE_GKREGEX")
+else()
+  set(GKlib_COPTIONS "-DLINUX -D_FILE_OFFSET_BITS=64")
+endif(MSVC)
+if(CYGWIN)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DCYGWIN")
+endif(CYGWIN)
+if(CMAKE_COMPILER_IS_GNUCC)
+# GCC opts.
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -std=c99 -fno-strict-aliasing")
+if(VALGRIND)
+  set(GKlib_COPTIONS "${GK_COPTIONS} -march=x86-64 -mtune=generic")
+else()
+# -march=native is not a valid flag on PPC:
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "power|ppc|powerpc|ppc64|powerpc64" OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "ppc|ppc64"))
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -mtune=native")
+else()
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -march=native")
+endif()
+endif(VALGRIND)
+  if(NOT MINGW)
+      set(GKlib_COPTIONS "${GKlib_COPTIONS} -fPIC")
+  endif(NOT MINGW)
+# GCC warnings.
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror -Wall -pedantic -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unknown-pragmas -Wno-unused-label")
+elseif(${CMAKE_C_COMPILER_ID} MATCHES "Sun")
+# Sun insists on -xc99.
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -xc99")
+endif(CMAKE_COMPILER_IS_GNUCC)
+
+# Intel compiler
+if(${CMAKE_C_COMPILER_ID} MATCHES "Intel")
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -xHost -std=c99")
+endif()
+
+# Find OpenMP if it is requested.
+if(OPENMP)
+  include(FindOpenMP)
+  if(OPENMP_FOUND)
+    set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__OPENMP__ ${OpenMP_C_FLAGS}")
+  else()
+    message(WARNING "OpenMP was requested but support was not found")
+  endif(OPENMP_FOUND)
+endif(OPENMP)
+
+# Set the CPU type 
+if(NO_X86)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNO_X86=${NO_X86}")
+endif(NO_X86)
+
+# Add various definitions.
+if(GDB)
+  set(GKlib_COPTS "${GKlib_COPTS} -g")
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -Werror")
+else()
+  set(GKlib_COPTS "-O3")
+endif(GDB)
+
+
+if(DEBUG)
+  set(GKlib_COPTS "-g")
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DDEBUG")
+endif(DEBUG)
+
+if(GPROF)
+  set(GKlib_COPTS "-pg")
+endif(GPROF)
+
+if(NOT ASSERT)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG")
+endif(NOT ASSERT)
+
+if(NOT ASSERT2)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DNDEBUG2")
+endif(NOT ASSERT2)
+
+
+# Add various options
+if(PCRE)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__WITHPCRE__")
+endif(PCRE)
+
+if(GKREGEX)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKREGEX")
+endif(GKREGEX)
+
+if(GKRAND)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DUSE_GKRAND")
+endif(GKRAND)
+
+
+# Check for features.
+check_include_file(execinfo.h HAVE_EXECINFO_H)
+if(HAVE_EXECINFO_H)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_EXECINFO_H")
+endif(HAVE_EXECINFO_H)
+
+check_function_exists(getline HAVE_GETLINE)
+if(HAVE_GETLINE)
+  set(GKlib_COPTIONS "${GKlib_COPTIONS} -DHAVE_GETLINE")
+endif(HAVE_GETLINE)
+
+
+# Custom check for TLS.
+if(MSVC)
+   set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=__declspec(thread)")
+
+  # This if checks if that value is cached or not.
+  if("${HAVE_THREADLOCALSTORAGE}" MATCHES "^${HAVE_THREADLOCALSTORAGE}$")
+    try_compile(HAVE_THREADLOCALSTORAGE
+      ${CMAKE_BINARY_DIR}
+      ${GKLIB_PATH}/conf/check_thread_storage.c)
+    if(HAVE_THREADLOCALSTORAGE)
+      message(STATUS "checking for thread-local storage - found")
+    else()
+      message(STATUS "checking for thread-local storage - not found")
+    endif()
+  endif()
+  if(NOT HAVE_THREADLOCALSTORAGE)
+    set(GKlib_COPTIONS "${GKlib_COPTIONS} -D__thread=")
+  endif()
+endif()
+
+# Finally set the official C flags.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GKlib_COPTIONS} ${GKlib_COPTS}")
+
+# Find GKlib sources.
+file(GLOB GKlib_sources ${GKLIB_PATH}/*.c)
+file(GLOB GKlib_includes ${GKLIB_PATH}/*.h)
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..b61ca6f
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,18 @@
+
+Copyright & License Notice
+---------------------------
+
+Copyright 1995-2018, Regents of the University of Minnesota
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
+implied. See the License for the specific language governing 
+permissions and limitations under the License.
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..6ac97b9
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,87 @@
+# Configuration options.
+cc       = gcc
+prefix   = ~/local
+openmp   = not-set
+gdb      = not-set
+assert   = not-set
+assert2  = not-set
+debug    = not-set
+gprof    = not-set
+valgrind = not-set
+pcre     = not-set
+gkregex  = not-set
+gkrand   = not-set
+
+
+# Basically proxies everything to the builddir cmake.
+cputype = $(shell uname -m | sed "s/\\ /_/g")
+systype = $(shell uname -s)
+
+BUILDDIR = build/$(systype)-$(cputype)
+
+# Process configuration options.
+CONFIG_FLAGS = -DCMAKE_VERBOSE_MAKEFILE=1
+ifneq ($(gdb), not-set)
+    CONFIG_FLAGS += -DGDB=$(gdb)
+endif
+ifneq ($(assert), not-set)
+    CONFIG_FLAGS += -DASSERT=$(assert)
+endif
+ifneq ($(assert2), not-set)
+    CONFIG_FLAGS += -DASSERT2=$(assert2)
+endif
+ifneq ($(debug), not-set)
+    CONFIG_FLAGS += -DDEBUG=$(debug)
+endif
+ifneq ($(gprof), not-set)
+    CONFIG_FLAGS += -DGPROF=$(gprof)
+endif
+ifneq ($(valgrind), not-set)
+    CONFIG_FLAGS += -DVALGRIND=$(valgrind)
+endif
+ifneq ($(openmp), not-set)
+    CONFIG_FLAGS += -DOPENMP=$(openmp)
+endif
+ifneq ($(pcre), not-set)
+    CONFIG_FLAGS += -DPCRE=$(pcre)
+endif
+ifneq ($(gkregex), not-set)
+    CONFIG_FLAGS += -DGKREGEX=$(pcre)
+endif
+ifneq ($(gkrand), not-set)
+    CONFIG_FLAGS += -DGKRAND=$(pcre)
+endif
+ifneq ($(prefix), not-set)
+    CONFIG_FLAGS += -DCMAKE_INSTALL_PREFIX=$(prefix)
+endif
+ifneq ($(cc), not-set)
+    CONFIG_FLAGS += -DCMAKE_C_COMPILER=$(cc)
+endif
+ifneq ($(cputype), x86_64)
+    CONFIG_FLAGS += -DNO_X86=$(cputype)
+endif
+
+define run-config
+mkdir -p $(BUILDDIR)
+cd $(BUILDDIR) && cmake $(CURDIR) $(CONFIG_FLAGS)
+endef
+
+all clean install: $(BUILDDIR)
+	make -C $(BUILDDIR) $@
+
+uninstall:
+	 xargs rm < $(BUILDDIR)/install_manifest.txt
+
+$(BUILDDIR):
+	$(run-config)
+
+config: distclean
+	$(run-config)
+
+distclean:
+	rm -rf $(BUILDDIR)
+
+remake:
+	find . -name CMakeLists.txt -exec touch {} ';'
+
+.PHONY: config distclean all clean install uninstall remake
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f94eeea
--- /dev/null
+++ b/README.md
@@ -0,0 +1,54 @@
+# GKlib
+A library of various helper routines and frameworks used by many of the lab's software
+
+## Build requirements
+ - CMake 2.8, found at http://www.cmake.org/, as well as GNU make. 
+
+Assuming that the above are available, two commands should suffice to 
+build the software:
+```
+make config 
+make
+```
+
+## Configuring the build
+It is primarily configured by passing options to make config. For example:
+```
+make config cc=icc
+```
+
+would configure it to be built using icc.
+
+Configuration options are:
+```
+cc=[compiler]     - The C compiler to use [default: gcc]
+prefix=[PATH]     - Set the installation prefix [default: ~/local]
+openmp=set        - To build a version with OpenMP support
+```
+
+
+## Building and installing
+To build and install, run the following
+```
+make
+make install
+```
+
+By default, the library file, header file, and binaries will be installed in
+```
+~/local/lib
+~/local/include
+~/local/bin
+```
+
+## Other make commands
+    make uninstall 
+         Removes all files installed by 'make install'.
+   
+    make clean 
+         Removes all object files but retains the configuration options.
+   
+    make distclean 
+         Performs clean and completely removes the build directory.
+
+
diff --git a/SETUP b/SETUP
new file mode 100755
index 0000000..a1d187d
--- /dev/null
+++ b/SETUP
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+if [ -z "$1" ]
+then
+   printf "Usage: setup.sh install_directory [dependencies].\n" 1>&2
+   exit 1
+fi
+
+cd $(dirname $0)
+libname=$(basename $(pwd))
+make config shared=1 prefix="$1"
+make install
diff --git a/b64.c b/b64.c
new file mode 100644
index 0000000..afacd68
--- /dev/null
+++ b/b64.c
@@ -0,0 +1,95 @@
+/*! 
+\file  b64.c
+\brief This file contains some simple 8bit-to-6bit encoding/deconding routines
+
+Most of these routines are outdated and should be converted using glibc's equivalent
+routines.
+
+\date   Started 2/22/05
+\author George
+\version\verbatim $Id: b64.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+
+\verbatim 
+$Copyright$ 
+$License$
+\endverbatim
+
+*/
+
+
+#include "GKlib.h"
+
+#define B64OFFSET       48      /* This is the '0' number */
+
+
+/******************************************************************************
+* Encode 3 '8-bit' binary bytes as 4 '6-bit' characters
+*******************************************************************************/
+void encodeblock(unsigned char *in, unsigned char *out)
+{
+  out[0] = (in[0] >> 2);
+  out[1] = (((in[0] & 0x03) << 4) | (in[1] >> 4));
+  out[2] = (((in[1] & 0x0f) << 2) | (in[2] >> 6));
+  out[3] = (in[2] & 0x3f);
+
+  out[0] += B64OFFSET;
+  out[1] += B64OFFSET;
+  out[2] += B64OFFSET;
+  out[3] += B64OFFSET;
+
+//  printf("%c %c %c %c %2x %2x %2x %2x %2x %2x %2x\n", out[0], out[1], out[2], out[3], out[0], out[1], out[2], out[3], in[0], in[1], in[2]);
+}
+
+/******************************************************************************
+* Decode 4 '6-bit' characters into 3 '8-bit' binary bytes
+*******************************************************************************/
+void decodeblock(unsigned char *in, unsigned char *out)
+{   
+  in[0] -= B64OFFSET;
+  in[1] -= B64OFFSET;
+  in[2] -= B64OFFSET;
+  in[3] -= B64OFFSET;
+
+  out[0] = (in[0] << 2 | in[1] >> 4);
+  out[1] = (in[1] << 4 | in[2] >> 2);
+  out[2] = (in[2] << 6 | in[3]);
+}
+
+
+/******************************************************************************
+* This function encodes an input array of bytes into a base64 encoding. Memory
+* for the output array is assumed to have been allocated by the calling program
+* and be sufficiently large. The output string is NULL terminated.
+*******************************************************************************/
+void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer)
+{
+  int i, j;
+
+  if (nbytes%3 != 0)
+    gk_errexit(SIGERR, "GKEncodeBase64: Input buffer size should be a multiple of 3! (%d)\n", nbytes);
+
+  for (j=0, i=0; i<nbytes; i+=3, j+=4) 
+    encodeblock(inbuffer+i, outbuffer+j);
+
+//printf("%d %d\n", nbytes, j);
+  outbuffer[j] = '\0';
+}
+
+
+
+/******************************************************************************
+* This function decodes an input array of base64 characters into their actual
+* 8-bit codes. Memory * for the output array is assumed to have been allocated 
+* by the calling program and be sufficiently large. The padding is discarded.
+*******************************************************************************/
+void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer)
+{
+  int i, j;
+
+  if (nbytes%4 != 0)
+    gk_errexit(SIGERR, "GKDecodeBase64: Input buffer size should be a multiple of 4! (%d)\n", nbytes);
+
+  for (j=0, i=0; i<nbytes; i+=4, j+=3) 
+    decodeblock(inbuffer+i, outbuffer+j);
+}
+
diff --git a/blas.c b/blas.c
new file mode 100644
index 0000000..a0b95ca
--- /dev/null
+++ b/blas.c
@@ -0,0 +1,37 @@
+/*!
+\file blas.c
+\brief This file contains GKlib's implementation of BLAS-like routines
+
+The BLAS routines that are currently implemented are mostly level-one.
+They follow a naming convention of the type gk_[type][name], where
+[type] is one of c, i, f, and d, based on C's four standard scalar
+datatypes of characters, integers, floats, and doubles.
+
+These routines are implemented using a generic macro template,
+which is used for code generation.
+
+\date   Started 9/28/95
+\author George
+\version\verbatim $Id: blas.c 14330 2013-05-18 12:15:15Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************/
+/*! Use the templates to generate BLAS routines for the scalar data types */
+/*************************************************************************/
+GK_MKBLAS(gk_c,   char,     int)
+GK_MKBLAS(gk_i,   int,      int)
+GK_MKBLAS(gk_i32, int32_t,  int32_t)
+GK_MKBLAS(gk_i64, int64_t,  int64_t)
+GK_MKBLAS(gk_z,   ssize_t,  ssize_t)
+GK_MKBLAS(gk_zu,  size_t,   size_t)
+GK_MKBLAS(gk_f,   float,    float)
+GK_MKBLAS(gk_d,   double,   double)
+GK_MKBLAS(gk_idx, gk_idx_t, gk_idx_t)
+
+
+
+
diff --git a/cache.c b/cache.c
new file mode 100644
index 0000000..932e36d
--- /dev/null
+++ b/cache.c
@@ -0,0 +1,126 @@
+/*!
+\file 
+\brief Functions dealing with simulating cache behavior for performance
+       modeling and analysis;
+
+\date Started 4/13/18
+\author George
+\author Copyright 1997-2011, Regents of the University of Minnesota 
+\version $Id: cache.c 21991 2018-04-16 03:08:12Z karypis $
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! This function creates a cache 
+ */
+/*************************************************************************/
+gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits)
+{
+  gk_cache_t *cache;
+
+  cache = (gk_cache_t *)gk_malloc(sizeof(gk_cache_t), "gk_cacheCreate: cache");
+  memset(cache, 0, sizeof(gk_cache_t));
+
+  cache->nway   = nway;
+  cache->lnbits = lnbits;
+  cache->cnbits = cnbits;
+  cache->csize  = 1<<cnbits;
+  cache->cmask  = cache->csize-1;
+
+  cache->latimes = gk_ui64smalloc(cache->csize*nway, 0, "gk_cacheCreate: latimes");
+  cache->clines  = gk_zusmalloc(cache->csize*nway, 0, "gk_cacheCreate: clines");
+
+  return cache;
+}
+
+
+/*************************************************************************/
+/*! This function resets a cache 
+ */
+/*************************************************************************/
+void gk_cacheReset(gk_cache_t *cache)
+{
+  cache->nhits   = 0;
+  cache->nmisses = 0;
+
+  gk_ui64set(cache->csize*cache->nway, 0, cache->latimes);
+  gk_zuset(cache->csize*cache->nway, 0, cache->clines);
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! This function destroys a cache.
+ */
+/*************************************************************************/
+void gk_cacheDestroy(gk_cache_t **r_cache)
+{
+  gk_cache_t *cache = *r_cache;
+
+  if (cache == NULL)
+    return;
+
+  gk_free((void **)&cache->clines, &cache->latimes, &cache, LTERM);
+
+  *r_cache = NULL;
+}
+
+
+/*************************************************************************/
+/*! This function simulates a load(ptr) operation.
+ */
+/*************************************************************************/
+int gk_cacheLoad(gk_cache_t *cache, size_t addr)
+{
+  uint32_t i, nway=cache->nway;
+  size_t lru=0;
+
+  //printf("%16"PRIx64" ", (uint64_t)addr);
+  addr = addr>>(cache->lnbits);
+  //printf("%16"PRIx64" %16"PRIx64" %16"PRIx64" ", (uint64_t)addr, (uint64_t)addr&(cache->cmask), (uint64_t)cache->cmask);
+
+  size_t *clines    = cache->clines  + (addr&(cache->cmask));
+  uint64_t *latimes = cache->latimes + (addr&(cache->cmask));
+
+  cache->clock++;
+  for (i=0; i<nway; i++) { /* look for hits */
+    if (clines[i] == addr) { 
+      cache->nhits++;
+      latimes[i] = cache->clock;
+      goto DONE;
+    }
+  }
+
+  for (i=0; i<nway; i++) { /* look for empty spots or the lru spot */
+    if (clines[i] == 0) {
+      lru = i;
+      break;
+    }
+    else if (latimes[i] < latimes[lru]) {
+      lru = i;
+    }
+  }
+
+  /* initial fill or replace */
+  cache->nmisses++;
+  clines[lru]  = addr;
+  latimes[lru] = cache->clock;
+
+DONE:
+  //printf(" %"PRIu64" %"PRIu64"\n", cache->nhits, cache->clock);
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function returns the cache's hitrate
+ */
+/*************************************************************************/
+double gk_cacheGetHitRate(gk_cache_t *cache)
+{
+  return ((double)cache->nhits)/((double)(cache->clock+1));
+}
+
diff --git a/conf/check_thread_storage.c b/conf/check_thread_storage.c
new file mode 100644
index 0000000..e6e1e98
--- /dev/null
+++ b/conf/check_thread_storage.c
@@ -0,0 +1,5 @@
+extern __thread int x;
+
+int main(int argc, char **argv) {
+  return 0;
+}
diff --git a/csr.c b/csr.c
new file mode 100644
index 0000000..7e92a0c
--- /dev/null
+++ b/csr.c
@@ -0,0 +1,3378 @@
+/*!
+ * \file 
+ *
+ * \brief Various routines with dealing with CSR matrices
+ *
+ * \author George Karypis
+ * \version\verbatim $Id: csr.c 21044 2017-05-24 22:50:32Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+#define OMPMINOPS       50000
+
+/*************************************************************************/
+/*! Allocate memory for a CSR matrix and initializes it 
+    \returns the allocated matrix. The various fields are set to NULL.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Create()
+{
+  gk_csr_t *mat=NULL;
+
+  if ((mat = (gk_csr_t *)gk_malloc(sizeof(gk_csr_t), "gk_csr_Create: mat")))
+    gk_csr_Init(mat);
+
+  return mat;
+}
+
+
+/*************************************************************************/
+/*! Initializes the matrix 
+    \param mat is the matrix to be initialized.
+*/
+/*************************************************************************/
+void gk_csr_Init(gk_csr_t *mat)
+{
+  memset(mat, 0, sizeof(gk_csr_t));
+  mat->nrows = mat->ncols = 0;
+}
+
+
+/*************************************************************************/
+/*! Frees all the memory allocated for matrix.
+    \param mat is the matrix to be freed.
+*/
+/*************************************************************************/
+void gk_csr_Free(gk_csr_t **mat)
+{
+  if (*mat == NULL)
+    return;
+  gk_csr_FreeContents(*mat);
+  gk_free((void **)mat, LTERM);
+}
+
+
+/*************************************************************************/
+/*! Frees only the memory allocated for the matrix's different fields and
+    sets them to NULL.
+    \param mat is the matrix whose contents will be freed.
+*/    
+/*************************************************************************/
+void gk_csr_FreeContents(gk_csr_t *mat)
+{
+  gk_free((void *)&mat->rowptr, &mat->rowind, &mat->rowval, 
+      &mat->rowids, &mat->rlabels, &mat->rmap,
+      &mat->colptr, &mat->colind, &mat->colval, 
+      &mat->colids, &mat->clabels, &mat->cmap,
+      &mat->rnorms, &mat->cnorms, &mat->rsums, &mat->csums, 
+      &mat->rsizes, &mat->csizes, &mat->rvols, &mat->cvols, 
+      &mat->rwgts, &mat->cwgts, 
+          LTERM);
+}
+
+
+/*************************************************************************/
+/*! Returns a copy of a matrix.
+    \param mat is the matrix to be duplicated.
+    \returns the newly created copy of the matrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Dup(gk_csr_t *mat)
+{
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows  = mat->nrows;
+  nmat->ncols  = mat->ncols;
+
+  /* copy the row structure */
+  if (mat->rowptr)
+    nmat->rowptr = gk_zcopy(mat->nrows+1, mat->rowptr, 
+                            gk_zmalloc(mat->nrows+1, "gk_csr_Dup: rowptr"));
+  if (mat->rowids)
+    nmat->rowids = gk_icopy(mat->nrows, mat->rowids, 
+                            gk_imalloc(mat->nrows, "gk_csr_Dup: rowids"));
+  if (mat->rlabels)
+    nmat->rlabels = gk_icopy(mat->nrows, mat->rlabels, 
+                            gk_imalloc(mat->nrows, "gk_csr_Dup: rlabels"));
+  if (mat->rnorms)
+    nmat->rnorms = gk_fcopy(mat->nrows, mat->rnorms, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rnorms"));
+  if (mat->rsums)
+    nmat->rsums = gk_fcopy(mat->nrows, mat->rsums, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rsums"));
+  if (mat->rsizes)
+    nmat->rsizes = gk_fcopy(mat->nrows, mat->rsizes, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rsizes"));
+  if (mat->rvols)
+    nmat->rvols = gk_fcopy(mat->nrows, mat->rvols, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rvols"));
+  if (mat->rwgts)
+    nmat->rwgts = gk_fcopy(mat->nrows, mat->rwgts, 
+                            gk_fmalloc(mat->nrows, "gk_csr_Dup: rwgts"));
+  if (mat->rowind)
+    nmat->rowind = gk_icopy(mat->rowptr[mat->nrows], mat->rowind, 
+                            gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowind"));
+  if (mat->rowval)
+    nmat->rowval = gk_fcopy(mat->rowptr[mat->nrows], mat->rowval, 
+                            gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Dup: rowval"));
+
+  /* copy the col structure */
+  if (mat->colptr)
+    nmat->colptr = gk_zcopy(mat->ncols+1, mat->colptr, 
+                            gk_zmalloc(mat->ncols+1, "gk_csr_Dup: colptr"));
+  if (mat->colids)
+    nmat->colids = gk_icopy(mat->ncols, mat->colids, 
+                            gk_imalloc(mat->ncols, "gk_csr_Dup: colids"));
+  if (mat->clabels)
+    nmat->clabels = gk_icopy(mat->ncols, mat->clabels, 
+                            gk_imalloc(mat->ncols, "gk_csr_Dup: clabels"));
+  if (mat->cnorms)
+    nmat->cnorms = gk_fcopy(mat->ncols, mat->cnorms, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: cnorms"));
+  if (mat->csums)
+    nmat->csums = gk_fcopy(mat->ncols, mat->csums, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: csums"));
+  if (mat->csizes)
+    nmat->csizes = gk_fcopy(mat->ncols, mat->csizes, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: csizes"));
+  if (mat->cvols)
+    nmat->cvols = gk_fcopy(mat->ncols, mat->cvols, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: cvols"));
+  if (mat->cwgts)
+    nmat->cwgts = gk_fcopy(mat->ncols, mat->cwgts, 
+                            gk_fmalloc(mat->ncols, "gk_csr_Dup: cwgts"));
+  if (mat->colind)
+    nmat->colind = gk_icopy(mat->colptr[mat->ncols], mat->colind, 
+                            gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colind"));
+  if (mat->colval)
+    nmat->colval = gk_fcopy(mat->colptr[mat->ncols], mat->colval, 
+                            gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Dup: colval"));
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Returns a submatrix containint a set of consecutive rows.
+    \param mat is the original matrix.
+    \param rstart is the starting row.
+    \param nrows is the number of rows from rstart to extract.
+    \returns the row structure of the newly created submatrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows)
+{
+  ssize_t i;
+  gk_csr_t *nmat;
+
+  if (rstart+nrows > mat->nrows)
+    return NULL;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows  = nrows;
+  nmat->ncols  = mat->ncols;
+
+  /* copy the row structure */
+  if (mat->rowptr)
+    nmat->rowptr = gk_zcopy(nrows+1, mat->rowptr+rstart, 
+                              gk_zmalloc(nrows+1, "gk_csr_ExtractSubmatrix: rowptr"));
+  for (i=nrows; i>=0; i--)
+    nmat->rowptr[i] -= nmat->rowptr[0];
+  ASSERT(nmat->rowptr[0] == 0);
+
+  if (mat->rowids)
+    nmat->rowids = gk_icopy(nrows, mat->rowids+rstart, 
+                            gk_imalloc(nrows, "gk_csr_ExtractSubmatrix: rowids"));
+  if (mat->rnorms)
+    nmat->rnorms = gk_fcopy(nrows, mat->rnorms+rstart, 
+                            gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rnorms"));
+
+  if (mat->rsums)
+    nmat->rsums = gk_fcopy(nrows, mat->rsums+rstart, 
+                            gk_fmalloc(nrows, "gk_csr_ExtractSubmatrix: rsums"));
+
+  ASSERT(nmat->rowptr[nrows] == mat->rowptr[rstart+nrows]-mat->rowptr[rstart]);
+  if (mat->rowind)
+    nmat->rowind = gk_icopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], 
+                            mat->rowind+mat->rowptr[rstart], 
+                            gk_imalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
+                                       "gk_csr_ExtractSubmatrix: rowind"));
+  if (mat->rowval)
+    nmat->rowval = gk_fcopy(mat->rowptr[rstart+nrows]-mat->rowptr[rstart], 
+                            mat->rowval+mat->rowptr[rstart], 
+                            gk_fmalloc(mat->rowptr[rstart+nrows]-mat->rowptr[rstart],
+                                       "gk_csr_ExtractSubmatrix: rowval"));
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Returns a submatrix containing a certain set of rows.
+    \param mat is the original matrix.
+    \param nrows is the number of rows to extract.
+    \param rind is the set of row numbers to extract.
+    \returns the row structure of the newly created submatrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind)
+{
+  ssize_t i, ii, j, nnz;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows = nrows;
+  nmat->ncols = mat->ncols;
+
+  for (nnz=0, i=0; i<nrows; i++)  
+    nnz += mat->rowptr[rind[i]+1]-mat->rowptr[rind[i]];
+
+  nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr");
+  nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind");
+  nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval");
+
+  nmat->rowptr[0] = 0;
+  for (nnz=0, j=0, ii=0; ii<nrows; ii++) {
+    i = rind[ii];
+    gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz);
+    gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz);
+    nnz += mat->rowptr[i+1]-mat->rowptr[i];
+    nmat->rowptr[++j] = nnz;
+  }
+  ASSERT(j == nmat->nrows);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Returns a submatrix corresponding to a specified partitioning of rows.
+    \param mat is the original matrix.
+    \param part is the partitioning vector of the rows.
+    \param pid is the partition ID that will be extracted.
+    \returns the row structure of the newly created submatrix.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid)
+{
+  ssize_t i, j, nnz;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows = 0;
+  nmat->ncols = mat->ncols;
+
+  for (nnz=0, i=0; i<mat->nrows; i++) {
+    if (part[i] == pid) {
+      nmat->nrows++;
+      nnz += mat->rowptr[i+1]-mat->rowptr[i];
+    }
+  }
+
+  nmat->rowptr = gk_zmalloc(nmat->nrows+1, "gk_csr_ExtractPartition: rowptr");
+  nmat->rowind = gk_imalloc(nnz, "gk_csr_ExtractPartition: rowind");
+  nmat->rowval = gk_fmalloc(nnz, "gk_csr_ExtractPartition: rowval");
+
+  nmat->rowptr[0] = 0;
+  for (nnz=0, j=0, i=0; i<mat->nrows; i++) {
+    if (part[i] == pid) {
+      gk_icopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowind+mat->rowptr[i], nmat->rowind+nnz);
+      gk_fcopy(mat->rowptr[i+1]-mat->rowptr[i], mat->rowval+mat->rowptr[i], nmat->rowval+nnz);
+      nnz += mat->rowptr[i+1]-mat->rowptr[i];
+      nmat->rowptr[++j] = nnz;
+    }
+  }
+  ASSERT(j == nmat->nrows);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Splits the matrix into multiple sub-matrices based on the provided
+    color array.
+    \param mat is the original matrix.
+    \param color is an array of size equal to the number of non-zeros
+           in the matrix (row-wise structure). The matrix is split into
+           as many parts as the number of colors. For meaningfull results,
+           the colors should be numbered consecutively starting from 0.
+    \returns an array of matrices for each supplied color number.
+*/
+/**************************************************************************/
+gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color)
+{
+  ssize_t i, j;
+  int nrows, ncolors;
+  ssize_t *rowptr;
+  int *rowind;
+  float *rowval;
+  gk_csr_t **smats;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  ncolors = gk_imax(rowptr[nrows], color, 1)+1;
+
+  smats = (gk_csr_t **)gk_malloc(sizeof(gk_csr_t *)*ncolors, "gk_csr_Split: smats");
+  for (i=0; i<ncolors; i++) {
+    smats[i] = gk_csr_Create();
+    smats[i]->nrows  = mat->nrows;
+    smats[i]->ncols  = mat->ncols;
+    smats[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_csr_Split: smats[i]->rowptr"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      smats[color[j]]->rowptr[i]++;
+  }
+  for (i=0; i<ncolors; i++) 
+    MAKECSR(j, nrows, smats[i]->rowptr);
+
+  for (i=0; i<ncolors; i++) {
+    smats[i]->rowind = gk_imalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowind"); 
+    smats[i]->rowval = gk_fmalloc(smats[i]->rowptr[nrows], "gk_csr_Split: smats[i]->rowval"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      smats[color[j]]->rowind[smats[color[j]]->rowptr[i]] = rowind[j];
+      smats[color[j]]->rowval[smats[color[j]]->rowptr[i]] = rowval[j];
+      smats[color[j]]->rowptr[i]++;
+    }
+  }
+
+  for (i=0; i<ncolors; i++) 
+    SHIFTCSR(j, nrows, smats[i]->rowptr);
+
+  return smats;
+}
+
+
+/**************************************************************************/
+/*! Determines the format of the CSR matrix based on the extension.
+    \param filename is the name of the file.
+    \param the user-supplied format.
+    \returns the type. The extension of the file directly maps to the
+           name of the format.
+*/
+/**************************************************************************/
+int gk_csr_DetermineFormat(char *filename, int format)
+{
+  if (format != GK_CSR_FMT_AUTO)
+    return format;
+
+  format = GK_CSR_FMT_CSR;
+  char *extension = gk_getextname(filename);
+
+  if (!strcmp(extension, "csr"))
+    format = GK_CSR_FMT_CSR;
+  else if (!strcmp(extension, "ijv"))
+    format = GK_CSR_FMT_IJV;
+  else if (!strcmp(extension, "cluto"))
+    format = GK_CSR_FMT_CLUTO;
+  else if (!strcmp(extension, "metis"))
+    format = GK_CSR_FMT_METIS;
+  else if (!strcmp(extension, "binrow"))
+    format = GK_CSR_FMT_BINROW;
+  else if (!strcmp(extension, "bincol"))
+    format = GK_CSR_FMT_BINCOL;
+  else if (!strcmp(extension, "bijv"))
+    format = GK_CSR_FMT_BIJV;
+
+  gk_free((void **)&extension, LTERM);
+
+  return format;
+}
+
+
+/**************************************************************************/
+/*! Reads a CSR matrix from the supplied file and stores it the matrix's 
+    forward structure.
+    \param filename is the file that stores the data.
+    \param format is either GK_CSR_FMT_METIS, GK_CSR_FMT_CLUTO, 
+           GK_CSR_FMT_CSR, GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL 
+           specifying the type of the input format. 
+           The GK_CSR_FMT_CSR does not contain a header
+           line, whereas the GK_CSR_FMT_BINROW is a binary format written 
+           by gk_csr_Write() using the same format specifier.
+    \param readvals is either 1 or 0, indicating if the CSR file contains
+           values or it does not. It only applies when GK_CSR_FMT_CSR is
+           used.
+    \param numbering is either 1 or 0, indicating if the numbering of the 
+           indices start from 1 or 0, respectively. If they start from 1, 
+           they are automatically decreamented during input so that they
+           will start from 0. It only applies when GK_CSR_FMT_CSR is
+           used.
+    \returns the matrix that was read.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering)
+{
+  ssize_t i, k, l;
+  size_t nfields, nrows, ncols, nnz, fmt, ncon;
+  size_t lnlen;
+  ssize_t *rowptr;
+  int *rowind, *iinds, *jinds, ival;
+  float *rowval=NULL, *vals, fval;
+  int readsizes, readwgts;
+  char *line=NULL, *head, *tail, fmtstr[256];
+  FILE *fpin;
+  gk_csr_t *mat=NULL;
+
+  format = gk_csr_DetermineFormat(filename, format);
+
+  if (!gk_fexists(filename)) 
+    gk_errexit(SIGERR, "File %s does not exist!\n", filename);
+
+  switch (format) {
+    case GK_CSR_FMT_BINROW:
+      mat = gk_csr_Create();
+
+      fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
+      if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
+      if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
+      mat->rowptr = gk_zmalloc(mat->nrows+1, "gk_csr_Read: rowptr");
+      if (fread(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpin) != mat->nrows+1)
+        gk_errexit(SIGERR, "Failed to read the rowptr from file %s!\n", filename);
+      mat->rowind = gk_imalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowind");
+      if (fread(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows])
+        gk_errexit(SIGERR, "Failed to read the rowind from file %s!\n", filename);
+      if (readvals == 1) {
+        mat->rowval = gk_fmalloc(mat->rowptr[mat->nrows], "gk_csr_Read: rowval");
+        if (fread(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpin) != mat->rowptr[mat->nrows])
+          gk_errexit(SIGERR, "Failed to read the rowval from file %s!\n", filename);
+      }
+
+      gk_fclose(fpin);
+      return mat;
+
+      break;
+
+    case GK_CSR_FMT_BINCOL:
+      mat = gk_csr_Create();
+
+      fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
+      if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
+      if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
+      mat->colptr = gk_zmalloc(mat->ncols+1, "gk_csr_Read: colptr");
+      if (fread(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpin) != mat->ncols+1)
+        gk_errexit(SIGERR, "Failed to read the colptr from file %s!\n", filename);
+      mat->colind = gk_imalloc(mat->colptr[mat->ncols], "gk_csr_Read: colind");
+      if (fread(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols])
+        gk_errexit(SIGERR, "Failed to read the colind from file %s!\n", filename);
+      if (readvals) {
+        mat->colval = gk_fmalloc(mat->colptr[mat->ncols], "gk_csr_Read: colval");
+        if (fread(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpin) != mat->colptr[mat->ncols])
+          gk_errexit(SIGERR, "Failed to read the colval from file %s!\n", filename);
+      }
+
+      gk_fclose(fpin);
+      return mat;
+
+      break;
+
+
+    case GK_CSR_FMT_IJV:
+      gk_getfilestats(filename, &nrows, &nnz, NULL, NULL);
+
+      if (readvals == 1 && 3*nrows != nnz)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 3.\n", nnz, readvals);
+      if (readvals == 0 && 2*nrows != nnz)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 2.\n", nnz, readvals);
+
+      nnz = nrows;
+      numbering = (numbering ? - 1 : 0);
+
+      /* read the data into three arrays */
+      iinds = gk_i32malloc(nnz, "iinds");
+      jinds = gk_i32malloc(nnz, "jinds");
+      vals  = (readvals ? gk_fmalloc(nnz, "vals") : NULL);
+
+      fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+      for (nrows=0, ncols=0, i=0; i<nnz; i++) {
+        if (readvals) {
+          if (fscanf(fpin, "%d %d %f", &iinds[i], &jinds[i], &vals[i]) != 3)
+            gk_errexit(SIGERR, "Error: Failed to read (i, j, val) for nnz: %zd.\n", i);
+        }
+        else {
+          if (fscanf(fpin, "%d %d", &iinds[i], &jinds[i]) != 2)
+            gk_errexit(SIGERR, "Error: Failed to read (i, j) value for nnz: %zd.\n", i);
+        }
+        iinds[i] += numbering;
+        jinds[i] += numbering;
+
+        if (nrows < iinds[i])
+          nrows = iinds[i];
+        if (ncols < jinds[i])
+          ncols = jinds[i];
+      }
+      nrows++;
+      ncols++;
+      gk_fclose(fpin);
+
+      /* convert (i, j, v) into a CSR matrix */
+      mat = gk_csr_Create();
+      mat->nrows = nrows;
+      mat->ncols = ncols;
+      rowptr = mat->rowptr = gk_zsmalloc(nrows+1, 0, "rowptr");
+      rowind = mat->rowind = gk_i32malloc(nnz, "rowind");
+      if (readvals)
+        rowval = mat->rowval = gk_fmalloc(nnz, "rowval");
+
+      for (i=0; i<nnz; i++)
+        rowptr[iinds[i]]++;
+      MAKECSR(i, nrows, rowptr);
+
+      for (i=0; i<nnz; i++) {
+        rowind[rowptr[iinds[i]]] = jinds[i];
+        if (readvals)
+          rowval[rowptr[iinds[i]]] = vals[i];
+        rowptr[iinds[i]]++;
+      }
+      SHIFTCSR(i, nrows, rowptr);
+
+      gk_free((void **)&iinds, &jinds, &vals, LTERM);
+
+      return mat;
+
+      break;
+
+    case GK_CSR_FMT_BIJV:
+      mat = gk_csr_Create();
+
+      fpin = gk_fopen(filename, "rb", "gk_csr_Read: fpin");
+
+      if (fread(&(mat->nrows), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the nrows from file %s!\n", filename);
+      if (fread(&(mat->ncols), sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the ncols from file %s!\n", filename);
+      if (fread(&nnz, sizeof(size_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the nnz from file %s!\n", filename);
+      if (fread(&readvals, sizeof(int32_t), 1, fpin) != 1)
+        gk_errexit(SIGERR, "Failed to read the readvals from file %s!\n", filename);
+
+      /* read the data into three arrays */
+      iinds = gk_i32malloc(nnz, "iinds");
+      jinds = gk_i32malloc(nnz, "jinds");
+      vals  = (readvals ? gk_fmalloc(nnz, "vals") : NULL);
+
+      for (i=0; i<nnz; i++) {
+        if (fread(&(iinds[i]), sizeof(int32_t), 1, fpin) != 1)
+          gk_errexit(SIGERR, "Failed to read iinds[i] from file %s!\n", filename);
+        if (fread(&(jinds[i]), sizeof(int32_t), 1, fpin) != 1)
+          gk_errexit(SIGERR, "Failed to read jinds[i] from file %s!\n", filename);
+        if (readvals) {
+          if (fread(&(vals[i]), sizeof(float), 1, fpin) != 1)
+            gk_errexit(SIGERR, "Failed to read vals[i] from file %s!\n", filename);
+        }
+        //printf("%d %d\n", iinds[i], jinds[i]);
+      }
+      gk_fclose(fpin);
+
+      /* convert (i, j, v) into a CSR matrix */
+      rowptr = mat->rowptr = gk_zsmalloc(mat->nrows+1, 0, "rowptr");
+      rowind = mat->rowind = gk_i32malloc(nnz, "rowind");
+      if (readvals)
+        rowval = mat->rowval = gk_fmalloc(nnz, "rowval");
+
+      for (i=0; i<nnz; i++)
+        rowptr[iinds[i]]++;
+      MAKECSR(i, mat->nrows, rowptr);
+
+      for (i=0; i<nnz; i++) {
+        rowind[rowptr[iinds[i]]] = jinds[i];
+        if (readvals)
+          rowval[rowptr[iinds[i]]] = vals[i];
+        rowptr[iinds[i]]++;
+      }
+      SHIFTCSR(i, mat->nrows, rowptr);
+
+      gk_free((void **)&iinds, &jinds, &vals, LTERM);
+
+      return mat;
+
+      break;
+
+
+    /* the following are handled by a common input code, that comes after the switch */
+
+    case GK_CSR_FMT_CLUTO:
+      fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+      do {
+        if (gk_getline(&line, &lnlen, fpin) <= 0)
+          gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
+      } while (line[0] == '%');
+
+      if (sscanf(line, "%zu %zu %zu", &nrows, &ncols, &nnz) != 3)
+        gk_errexit(SIGERR, "Header line must contain 3 integers.\n");
+
+      readsizes = 0;
+      readwgts  = 0;
+      readvals  = 1;
+      numbering = 1;
+
+      break;
+
+    case GK_CSR_FMT_METIS:
+      fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+      do {
+        if (gk_getline(&line, &lnlen, fpin) <= 0)
+          gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
+      } while (line[0] == '%');
+
+      fmt = ncon = 0;
+      nfields = sscanf(line, "%zu %zu %zu %zu", &nrows, &nnz, &fmt, &ncon);
+      if (nfields < 2)
+        gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n");
+
+      ncols = nrows;
+      nnz *= 2;
+
+      if (fmt > 111)
+        gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt);
+
+      sprintf(fmtstr, "%03zu", fmt%1000);
+      readsizes = (fmtstr[0] == '1');
+      readwgts  = (fmtstr[1] == '1');
+      readvals  = (fmtstr[2] == '1');
+      numbering = 1;
+      ncon      = (ncon == 0 ? 1 : ncon);
+
+      break;
+
+    case GK_CSR_FMT_CSR:
+      readsizes = 0;
+      readwgts  = 0;
+
+      gk_getfilestats(filename, &nrows, &nnz, NULL, NULL);
+
+      if (readvals == 1 && nnz%2 == 1)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not even.\n", nnz, readvals);
+      if (readvals == 1)
+        nnz = nnz/2;
+      fpin = gk_fopen(filename, "r", "gk_csr_Read: fpin");
+
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown csr format.\n");
+      return NULL;
+  }
+
+  mat = gk_csr_Create();
+
+  mat->nrows = nrows;
+
+  rowptr = mat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Read: rowptr");
+  rowind = mat->rowind = gk_imalloc(nnz, "gk_csr_Read: rowind");
+  if (readvals != 2)
+    rowval = mat->rowval = gk_fsmalloc(nnz, 1.0, "gk_csr_Read: rowval");
+
+  if (readsizes)
+    mat->rsizes = gk_fsmalloc(nrows, 0.0, "gk_csr_Read: rsizes");
+
+  if (readwgts)
+    mat->rwgts = gk_fsmalloc(nrows*ncon, 0.0, "gk_csr_Read: rwgts");
+
+  /*----------------------------------------------------------------------
+   * Read the sparse matrix file
+   *---------------------------------------------------------------------*/
+  numbering = (numbering ? -1 : 0);
+  for (ncols=0, rowptr[0]=0, k=0, i=0; i<nrows; i++) {
+    do {
+      if (gk_getline(&line, &lnlen, fpin) == -1)
+        gk_errexit(SIGERR, "Premature end of input file: file while reading row %d\n", i);
+    } while (line[0] == '%');
+
+    head = line;
+    tail = NULL;
+
+    /* Read vertex sizes */
+    if (readsizes) {
+#ifdef __MSC__
+      mat->rsizes[i] = (float)strtod(head, &tail);
+#else
+      mat->rsizes[i] = strtof(head, &tail);
+#endif
+      if (tail == head)
+        gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
+      if (mat->rsizes[i] < 0)
+        errexit("The size for vertex %zd must be >= 0\n", i+1);
+      head = tail;
+    }
+
+    /* Read vertex weights */
+    if (readwgts) {
+      for (l=0; l<ncon; l++) {
+#ifdef __MSC__
+        mat->rwgts[i*ncon+l] = (float)strtod(head, &tail);
+#else
+        mat->rwgts[i*ncon+l] = strtof(head, &tail);
+#endif
+        if (tail == head)
+          errexit("The line for vertex %zd does not have enough weights "
+                  "for the %d constraints.\n", i+1, ncon);
+        if (mat->rwgts[i*ncon+l] < 0)
+          errexit("The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
+        head = tail;
+      }
+    }
+
+   
+    /* Read the rest of the row */
+    while (1) {
+      ival = (int)strtol(head, &tail, 0);
+      if (tail == head) 
+        break;
+      head = tail;
+      
+      if ((rowind[k] = ival + numbering) < 0)
+        gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i);
+
+      ncols = gk_max(rowind[k], ncols);
+
+      if (readvals == 1) {
+#ifdef __MSC__
+        fval = (float)strtod(head, &tail);
+#else
+	fval = strtof(head, &tail);
+#endif
+        if (tail == head)
+          gk_errexit(SIGERR, "Value could not be found for column! Row:%zd, NNZ:%zd\n", i, k);
+        head = tail;
+
+        rowval[k] = fval;
+      }
+      k++;
+    }
+    rowptr[i+1] = k;
+  }
+
+  if (format == GK_CSR_FMT_METIS) {
+    ASSERT(ncols+1 == mat->nrows);
+    mat->ncols = mat->nrows;
+  }
+  else {
+    mat->ncols = ncols+1;
+  }
+
+  if (k != nnz)
+    gk_errexit(SIGERR, "gk_csr_Read: Something wrong with the number of nonzeros in "
+                       "the input file. NNZ=%zd, ActualNNZ=%zd.\n", nnz, k);
+
+  gk_fclose(fpin);
+
+  gk_free((void **)&line, LTERM);
+
+  return mat;
+}
+
+
+/**************************************************************************/
+/*! Writes the row-based structure of a matrix into a file.
+    \param mat is the matrix to be written,
+    \param filename is the name of the output file.
+    \param format is one of: GK_CSR_FMT_CLUTO, GK_CSR_FMT_CSR, 
+           GK_CSR_FMT_BINROW, GK_CSR_FMT_BINCOL, GK_CSR_FMT_BIJV.
+    \param writevals is either 1 or 0 indicating if the values will be 
+           written or not. This is only applicable when GK_CSR_FMT_CSR
+           is used.
+    \param numbering is either 1 or 0 indicating if the internal 0-based 
+           numbering will be shifted by one or not during output. This 
+           is only applicable when GK_CSR_FMT_CSR is used.
+*/
+/**************************************************************************/
+void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering)
+{
+  ssize_t i, j;
+  int32_t edge[2];
+  FILE *fpout;
+
+  format = gk_csr_DetermineFormat(filename, format);
+
+  switch (format) {
+    case GK_CSR_FMT_METIS:
+      if (mat->nrows != mat->ncols || mat->rowptr[mat->nrows]%2 == 1)
+        gk_errexit(SIGERR, "METIS output format requires a square symmetric matrix.\n");
+
+      if (filename)
+        fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout");
+      else
+        fpout = stdout; 
+
+      fprintf(fpout, "%d %zd\n", mat->nrows, mat->rowptr[mat->nrows]/2);
+      for (i=0; i<mat->nrows; i++) {
+        for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) 
+          fprintf(fpout, " %d", mat->rowind[j]+1);
+        fprintf(fpout, "\n");
+      }
+      if (filename)
+        gk_fclose(fpout);
+      break;
+
+    case GK_CSR_FMT_BINROW:
+      if (filename == NULL)
+        gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+      fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
+
+      fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); 
+      fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); 
+      fwrite(mat->rowptr, sizeof(ssize_t), mat->nrows+1, fpout); 
+      fwrite(mat->rowind, sizeof(int32_t), mat->rowptr[mat->nrows], fpout); 
+      if (writevals)
+        fwrite(mat->rowval, sizeof(float), mat->rowptr[mat->nrows], fpout); 
+
+      gk_fclose(fpout);
+      return;
+
+      break;
+
+    case GK_CSR_FMT_BINCOL:
+      if (filename == NULL)
+        gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+      fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
+
+      fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); 
+      fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); 
+      fwrite(mat->colptr, sizeof(ssize_t), mat->ncols+1, fpout); 
+      fwrite(mat->colind, sizeof(int32_t), mat->colptr[mat->ncols], fpout); 
+      if (writevals) 
+        fwrite(mat->colval, sizeof(float), mat->colptr[mat->ncols], fpout); 
+
+      gk_fclose(fpout);
+      return;
+
+      break;
+
+    case GK_CSR_FMT_IJV:
+      if (filename == NULL)
+        gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+      fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout");
+
+      numbering = (numbering ? 1 : 0);
+      for (i=0; i<mat->nrows; i++) {
+        for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+          if (writevals)
+            fprintf(fpout, "%zd %d %.8f\n", i+numbering, mat->rowind[j]+numbering, mat->rowval[j]);
+          else
+            fprintf(fpout, "%zd %d\n", i+numbering, mat->rowind[j]+numbering);
+        }
+      }
+
+      gk_fclose(fpout);
+      return;
+
+      break;
+
+    case GK_CSR_FMT_BIJV:
+      if (filename == NULL)
+        gk_errexit(SIGERR, "The filename parameter cannot be NULL.\n");
+      fpout = gk_fopen(filename, "wb", "gk_csr_Write: fpout");
+
+      fwrite(&(mat->nrows), sizeof(int32_t), 1, fpout); 
+      fwrite(&(mat->ncols), sizeof(int32_t), 1, fpout); 
+      fwrite(&(mat->rowptr[mat->nrows]), sizeof(size_t), 1, fpout); 
+      fwrite(&writevals, sizeof(int32_t), 1, fpout); 
+
+      for (i=0; i<mat->nrows; i++) {
+        edge[0] = i;
+        for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+          edge[1] = mat->rowind[j];
+          fwrite(edge, sizeof(int32_t), 2, fpout);
+          if (writevals) 
+            fwrite(&(mat->rowval[j]), sizeof(float), 1, fpout);
+        }
+      }
+
+      gk_fclose(fpout);
+      return;
+
+      break;
+
+    default:
+      if (filename)
+        fpout = gk_fopen(filename, "w", "gk_csr_Write: fpout");
+      else
+        fpout = stdout; 
+
+      if (format == GK_CSR_FMT_CLUTO) {
+        fprintf(fpout, "%d %d %zd\n", mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
+        writevals = 1;
+        numbering = 1;
+      }
+
+      for (i=0; i<mat->nrows; i++) {
+        for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+          fprintf(fpout, " %d", mat->rowind[j]+(numbering ? 1 : 0));
+          if (writevals) 
+            fprintf(fpout, " %f", mat->rowval[j]);
+        }
+        fprintf(fpout, "\n");
+      }
+      if (filename)
+        gk_fclose(fpout);
+  }
+}
+
+
+/*************************************************************************/
+/*! Prunes certain rows/columns of the matrix. The prunning takes place 
+    by analyzing the row structure of the matrix. The prunning takes place
+    by removing rows/columns but it does not affect the numbering of the
+    remaining rows/columns.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param minf is the minimum number of rows (columns) that a column (row) must
+           be present in order to be kept,
+    \param maxf is the maximum number of rows (columns) that a column (row) must
+          be present at in order to be kept.
+    \returns the prunned matrix consisting only of its row-based structure. 
+          The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf)
+{
+  ssize_t i, j, nnz;
+  int nrows, ncols;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind, *collen;
+  float *rowval, *nrowval;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+  
+  nrows = nmat->nrows = mat->nrows;
+  ncols = nmat->ncols = mat->ncols;
+
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Prune: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Prune: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_Prune: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      collen = gk_ismalloc(ncols, 0, "gk_csr_Prune: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          ASSERT(rowind[j] < ncols);
+          collen[rowind[j]]++;
+        }
+      }
+      for (i=0; i<ncols; i++)
+        collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0);
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          if (collen[rowind[j]]) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+            nnz++;
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      gk_free((void **)&collen, LTERM);
+      break;
+
+    case GK_CSR_ROW:
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Eliminates certain entries from the rows/columns of the matrix. The 
+    filtering takes place by keeping only the highest weight entries whose
+    sum accounts for a certain fraction of the overall weight of the 
+    row/column.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param norm indicates the norm that will be used to aggregate the weights
+           and possible values are 1 or 2,
+    \param fraction is the fraction of the overall norm that will be retained
+           by the kept entries.
+    \returns the filtered matrix consisting only of its row-based structure. 
+           The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction)
+{
+  ssize_t i, j, nnz;
+  int nrows, ncols, ncand, maxlen=0;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind;
+  float *rowval, *colval, *nrowval, rsum, tsum;
+  gk_csr_t *nmat;
+  gk_fkv_t *cand;
+
+  nmat = gk_csr_Create();
+  
+  nrows = nmat->nrows = mat->nrows;
+  ncols = nmat->ncols = mat->ncols;
+
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colval = mat->colval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      if (mat->colptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n");
+
+      gk_zcopy(nrows+1, rowptr, nrowptr);
+
+      for (i=0; i<ncols; i++) 
+        maxlen = gk_max(maxlen, colptr[i+1]-colptr[i]);
+
+      #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
+      {
+        cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand");
+
+        #pragma omp for schedule(static)
+        for (i=0; i<ncols; i++) {
+          for (tsum=0.0, ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) {
+            cand[ncand].val = colind[j];
+            cand[ncand].key = colval[j];
+            tsum += (norm == 1 ? colval[j] : colval[j]*colval[j]);
+          }
+          gk_fkvsortd(ncand, cand);
+
+          for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) {
+            rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key);
+            nrowind[nrowptr[cand[j].val]] = i;
+            nrowval[nrowptr[cand[j].val]] = cand[j].key;
+            nrowptr[cand[j].val]++;
+          }
+        }
+
+        gk_free((void **)&cand, LTERM);
+      }
+
+      /* compact the nrowind/nrowval */
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) {
+          nrowind[nnz] = nrowind[j];
+          nrowval[nnz] = nrowval[j];
+        }
+        nrowptr[i] = nnz;
+      }
+      SHIFTCSR(i, nrows, nrowptr);
+
+      break;
+
+    case GK_CSR_ROW:
+      if (mat->rowptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
+
+      for (i=0; i<nrows; i++) 
+        maxlen = gk_max(maxlen, rowptr[i+1]-rowptr[i]);
+
+      #pragma omp parallel private(i, j, ncand, rsum, tsum, cand)
+      {
+        cand = gk_fkvmalloc(maxlen, "gk_csr_LowFilter: cand");
+
+        #pragma omp for schedule(static)
+        for (i=0; i<nrows; i++) {
+          for (tsum=0.0, ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) {
+            cand[ncand].val = rowind[j];
+            cand[ncand].key = rowval[j];
+            tsum += (norm == 1 ? rowval[j] : rowval[j]*rowval[j]);
+          }
+          gk_fkvsortd(ncand, cand);
+
+          for (rsum=0.0, j=0; j<ncand && rsum<=fraction*tsum; j++) {
+            rsum += (norm == 1 ? cand[j].key : cand[j].key*cand[j].key);
+            nrowind[rowptr[i]+j] = cand[j].val;
+            nrowval[rowptr[i]+j] = cand[j].key;
+          }
+          nrowptr[i+1] = rowptr[i]+j;
+        }
+
+        gk_free((void **)&cand, LTERM);
+      }
+
+      /* compact nrowind/nrowval */
+      nrowptr[0] = nnz = 0;
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<nrowptr[i+1]; j++, nnz++) {
+          nrowind[nnz] = nrowind[j];
+          nrowval[nnz] = nrowval[j];
+        }
+        nrowptr[i+1] = nnz;
+      }
+
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Eliminates certain entries from the rows/columns of the matrix. The 
+    filtering takes place by keeping only the highest weight top-K entries 
+    along each row/column and those entries whose weight is greater than
+    a specified value.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param topk is the number of the highest weight entries to keep.
+    \param keepval is the weight of a term above which will be kept. This
+           is used to select additional terms past the first topk.
+    \returns the filtered matrix consisting only of its row-based structure. 
+           The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval)
+{
+  ssize_t i, j, k, nnz;
+  int nrows, ncols, ncand;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind;
+  float *rowval, *colval, *nrowval;
+  gk_csr_t *nmat;
+  gk_fkv_t *cand;
+
+  nmat = gk_csr_Create();
+  
+  nrows = nmat->nrows = mat->nrows;
+  ncols = nmat->ncols = mat->ncols;
+
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colval = mat->colval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_LowFilter: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_LowFilter: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_LowFilter: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      if (mat->colptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter columns when column-based structure has not been created.\n");
+
+      cand = gk_fkvmalloc(nrows, "gk_csr_LowFilter: cand");
+
+      gk_zcopy(nrows+1, rowptr, nrowptr);
+      for (i=0; i<ncols; i++) {
+        for (ncand=0, j=colptr[i]; j<colptr[i+1]; j++, ncand++) {
+          cand[ncand].val = colind[j];
+          cand[ncand].key = colval[j];
+        }
+        gk_fkvsortd(ncand, cand);
+
+        k = gk_min(topk, ncand);
+        for (j=0; j<k; j++) {
+          nrowind[nrowptr[cand[j].val]] = i;
+          nrowval[nrowptr[cand[j].val]] = cand[j].key;
+          nrowptr[cand[j].val]++;
+        }
+        for (; j<ncand; j++) {
+          if (cand[j].key < keepval) 
+            break;
+
+          nrowind[nrowptr[cand[j].val]] = i;
+          nrowval[nrowptr[cand[j].val]] = cand[j].key;
+          nrowptr[cand[j].val]++;
+        }
+      }
+
+      /* compact the nrowind/nrowval */
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<nrowptr[i]; j++, nnz++) {
+          nrowind[nnz] = nrowind[j];
+          nrowval[nnz] = nrowval[j];
+        }
+        nrowptr[i] = nnz;
+      }
+      SHIFTCSR(i, nrows, nrowptr);
+
+      gk_free((void **)&cand, LTERM);
+      break;
+
+    case GK_CSR_ROW:
+      if (mat->rowptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
+
+      cand = gk_fkvmalloc(ncols, "gk_csr_LowFilter: cand");
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (ncand=0, j=rowptr[i]; j<rowptr[i+1]; j++, ncand++) {
+          cand[ncand].val = rowind[j];
+          cand[ncand].key = rowval[j];
+        }
+        gk_fkvsortd(ncand, cand);
+
+        k = gk_min(topk, ncand);
+        for (j=0; j<k; j++, nnz++) {
+          nrowind[nnz] = cand[j].val;
+          nrowval[nnz] = cand[j].key;
+        }
+        for (; j<ncand; j++, nnz++) {
+          if (cand[j].key < keepval) 
+            break;
+
+          nrowind[nnz] = cand[j].val;
+          nrowval[nnz] = cand[j].key;
+        }
+        nrowptr[i+1] = nnz;
+      }
+
+      gk_free((void **)&cand, LTERM);
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Eliminates certain entries from the rows/columns of the matrix. The 
+    filtering takes place by keeping only the terms whose contribution to
+    the total length of the document is greater than a user-splied multiple
+    over the average.
+
+    This routine assumes that the vectors are normalized to be unit length.
+   
+    \param mat the matrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the matrix will be prunned,
+    \param zscore is the multiplicative factor over the average contribution 
+           to the length of the document.
+    \returns the filtered matrix consisting only of its row-based structure. 
+           The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore)
+{
+  ssize_t i, j, nnz;
+  int nrows;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind;
+  float *rowval, *nrowval, avgwgt;
+  gk_csr_t *nmat;
+
+  nmat = gk_csr_Create();
+  
+  nmat->nrows = mat->nrows;
+  nmat->ncols = mat->ncols;
+
+  nrows  = mat->nrows; 
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ZScoreFilter: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ZScoreFilter: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      gk_errexit(SIGERR, "This has not been implemented yet.\n");
+      break;
+
+    case GK_CSR_ROW:
+      if (mat->rowptr == NULL) 
+        gk_errexit(SIGERR, "Cannot filter rows when row-based structure has not been created.\n");
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        avgwgt = zscore/(rowptr[i+1]-rowptr[i]);
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          if (rowval[j] > avgwgt) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+            nnz++;
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      break;
+
+    default:
+      gk_csr_Free(&nmat);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! Compacts the column-space of the matrix by removing empty columns.
+    As a result of the compaction, the column numbers are renumbered. 
+    The compaction operation is done in place and only affects the row-based
+    representation of the matrix.
+    The new columns are ordered in decreasing frequency.
+   
+    \param mat the matrix whose empty columns will be removed.
+*/
+/**************************************************************************/
+void gk_csr_CompactColumns(gk_csr_t *mat)
+{
+  ssize_t i;
+  int nrows, ncols, nncols;
+  ssize_t *rowptr;
+  int *rowind, *colmap;
+  gk_ikv_t *clens;
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+
+  colmap = gk_imalloc(ncols, "gk_csr_CompactColumns: colmap");
+
+  clens = gk_ikvmalloc(ncols, "gk_csr_CompactColumns: clens");
+  for (i=0; i<ncols; i++) {
+    clens[i].key = 0;
+    clens[i].val = i;
+  }
+
+  for (i=0; i<rowptr[nrows]; i++) 
+    clens[rowind[i]].key++;
+  gk_ikvsortd(ncols, clens);
+
+  for (nncols=0, i=0; i<ncols; i++) {
+    if (clens[i].key > 0) 
+      colmap[clens[i].val] = nncols++;
+    else
+      break;
+  }
+
+  for (i=0; i<rowptr[nrows]; i++) 
+    rowind[i] = colmap[rowind[i]];
+
+  mat->ncols = nncols;
+
+  gk_free((void **)&colmap, &clens, LTERM);
+}
+
+
+/*************************************************************************/
+/*! Sorts the indices in increasing order
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which set of
+           indices to sort.
+*/
+/**************************************************************************/
+void gk_csr_SortIndices(gk_csr_t *mat, int what)
+{
+  int n, nn=0;
+  ssize_t *ptr;
+  int *ind;
+  float *val;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      if (!mat->rowptr)
+        gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
+
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      ind = mat->rowind;
+      val = mat->rowval;
+      break;
+
+    case GK_CSR_COL:
+      if (!mat->colptr)
+        gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
+
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      ind = mat->colind;
+      val = mat->colval;
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return;
+  }
+
+  #pragma omp parallel if (n > 100)
+  {
+    ssize_t i, j, k;
+    gk_ikv_t *cand;
+    float *tval;
+
+    #pragma omp single
+    for (i=0; i<n; i++) 
+      nn = gk_max(nn, ptr[i+1]-ptr[i]);
+  
+    cand = gk_ikvmalloc(nn, "gk_csr_SortIndices: cand");
+    tval = gk_fmalloc(nn, "gk_csr_SortIndices: tval");
+  
+    #pragma omp for schedule(static)
+    for (i=0; i<n; i++) {
+      for (k=0, j=ptr[i]; j<ptr[i+1]; j++) {
+        if (j > ptr[i] && ind[j] < ind[j-1])
+          k = 1; /* an inversion */
+        cand[j-ptr[i]].val = j-ptr[i];
+        cand[j-ptr[i]].key = ind[j];
+        tval[j-ptr[i]]     = val[j];
+      }
+      if (k) {
+        gk_ikvsorti(ptr[i+1]-ptr[i], cand);
+        for (j=ptr[i]; j<ptr[i+1]; j++) {
+          ind[j] = cand[j-ptr[i]].key;
+          val[j] = tval[cand[j-ptr[i]].val];
+        }
+      }
+    }
+
+    gk_free((void **)&cand, &tval, LTERM);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Creates a row/column index from the column/row data.
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which index
+           will be created.
+*/
+/**************************************************************************/
+void gk_csr_CreateIndex(gk_csr_t *mat, int what)
+{
+  /* 'f' stands for forward, 'r' stands for reverse */
+  ssize_t i, j, k, nf, nr;
+  ssize_t *fptr, *rptr;
+  int *find, *rind;
+  float *fval, *rval;
+
+  switch (what) {
+    case GK_CSR_COL:
+      nf   = mat->nrows;
+      fptr = mat->rowptr;
+      find = mat->rowind;
+      fval = mat->rowval;
+
+      if (mat->colptr) gk_free((void **)&mat->colptr, LTERM);
+      if (mat->colind) gk_free((void **)&mat->colind, LTERM);
+      if (mat->colval) gk_free((void **)&mat->colval, LTERM);
+
+      nr   = mat->ncols;
+      rptr = mat->colptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr");
+      rind = mat->colind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind");
+      rval = mat->colval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL);
+      break;
+    case GK_CSR_ROW:
+      nf   = mat->ncols;
+      fptr = mat->colptr;
+      find = mat->colind;
+      fval = mat->colval;
+
+      if (mat->rowptr) gk_free((void **)&mat->rowptr, LTERM);
+      if (mat->rowind) gk_free((void **)&mat->rowind, LTERM);
+      if (mat->rowval) gk_free((void **)&mat->rowval, LTERM);
+
+      nr   = mat->nrows;
+      rptr = mat->rowptr = gk_zsmalloc(nr+1, 0, "gk_csr_CreateIndex: rptr");
+      rind = mat->rowind = gk_imalloc(fptr[nf], "gk_csr_CreateIndex: rind");
+      rval = mat->rowval = (fval ? gk_fmalloc(fptr[nf], "gk_csr_CreateIndex: rval") : NULL);
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return;
+  }
+
+
+  for (i=0; i<nf; i++) {
+    for (j=fptr[i]; j<fptr[i+1]; j++)
+      rptr[find[j]]++;
+  }
+  MAKECSR(i, nr, rptr);
+  
+  if (rptr[nr] > 6*nr) {
+    for (i=0; i<nf; i++) {
+      for (j=fptr[i]; j<fptr[i+1]; j++) 
+        rind[rptr[find[j]]++] = i;
+    }
+    SHIFTCSR(i, nr, rptr);
+
+    if (fval) {
+      for (i=0; i<nf; i++) {
+        for (j=fptr[i]; j<fptr[i+1]; j++) 
+          rval[rptr[find[j]]++] = fval[j];
+      }
+      SHIFTCSR(i, nr, rptr);
+    }
+  }
+  else {
+    if (fval) {
+      for (i=0; i<nf; i++) {
+        for (j=fptr[i]; j<fptr[i+1]; j++) {
+          k = find[j];
+          rind[rptr[k]]   = i;
+          rval[rptr[k]++] = fval[j];
+        }
+      }
+    }
+    else {
+      for (i=0; i<nf; i++) {
+        for (j=fptr[i]; j<fptr[i+1]; j++) 
+          rind[rptr[find[j]]++] = i;
+      }
+    }
+    SHIFTCSR(i, nr, rptr);
+  }
+}
+
+
+/*************************************************************************/
+/*! Normalizes the rows/columns of the matrix to be unit 
+    length.
+    \param mat the matrix itself,
+    \param what indicates what will be normalized and is obtained by
+           specifying GK_CSR_ROW, GK_CSR_COL, GK_CSR_ROW|GK_CSR_COL. 
+    \param norm indicates what norm is to normalize to, 1: 1-norm, 2: 2-norm
+*/
+/**************************************************************************/
+void gk_csr_Normalize(gk_csr_t *mat, int what, int norm)
+{
+  ssize_t i, j;
+  int n;
+  ssize_t *ptr;
+  float *val, sum;
+
+
+  if (what&GK_CSR_ROW && mat->rowval) {
+    n   = mat->nrows;
+    ptr = mat->rowptr;
+    val = mat->rowval;
+
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) private(j,sum) schedule(static)
+    for (i=0; i<n; i++) {
+      sum = 0.0;
+      if (norm == 1) {
+        for (j=ptr[i]; j<ptr[i+1]; j++) 
+          sum += val[j]; /* assume val[j] > 0 */ 
+        if (sum > 0)
+          sum = 1.0/sum;
+      }
+      else if (norm == 2) {
+        for (j=ptr[i]; j<ptr[i+1]; j++) 
+          sum += val[j]*val[j];
+        if (sum > 0)
+          sum = 1.0/sqrt(sum); 
+      }
+      for (j=ptr[i]; j<ptr[i+1]; j++)
+        val[j] *= sum;
+    }
+  }
+
+  if (what&GK_CSR_COL && mat->colval) {
+    n   = mat->ncols;
+    ptr = mat->colptr;
+    val = mat->colval;
+
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) private(j,sum) schedule(static)
+    for (i=0; i<n; i++) {
+      sum = 0.0;
+      if (norm == 1) {
+        for (j=ptr[i]; j<ptr[i+1]; j++) 
+          sum += val[j]; /* assume val[j] > 0 */ 
+        if (sum > 0)
+          sum = 1.0/sum;
+      }
+      else if (norm == 2) {
+        for (j=ptr[i]; j<ptr[i+1]; j++) 
+          sum += val[j]*val[j];
+        if (sum > 0)
+          sum = 1.0/sqrt(sum); 
+      }
+      for (j=ptr[i]; j<ptr[i+1]; j++)
+        val[j] *= sum;
+    }
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Applies different row scaling methods.
+    \param mat the matrix itself,
+    \param type indicates the type of row scaling. Possible values are:
+           GK_CSR_MAXTF, GK_CSR_SQRT, GK_CSR_LOG, GK_CSR_IDF, GK_CSR_MAXTF2.
+*/
+/**************************************************************************/
+void gk_csr_Scale(gk_csr_t *mat, int type)
+{
+  ssize_t i, j;
+  int nrows, ncols, nnzcols, bgfreq;
+  ssize_t *rowptr;
+  int *rowind, *collen;
+  float *rowval, *cscale, maxtf;
+  double logscale = 1.0/log(2.0);
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  switch (type) {
+    case GK_CSR_MAXTF: /* TF' = .5 + .5*TF/MAX(TF) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j, maxtf) schedule(static)
+      for (i=0; i<nrows; i++) {
+        maxtf = fabs(rowval[rowptr[i]]);
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+          maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf);
+  
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          rowval[j] = .5 + .5*rowval[j]/maxtf;
+      }
+      break;
+
+    case GK_CSR_MAXTF2: /* TF' = .1 + .9*TF/MAX(TF) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j, maxtf) schedule(static)
+      for (i=0; i<nrows; i++) {
+        maxtf = fabs(rowval[rowptr[i]]);
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+          maxtf = (maxtf < fabs(rowval[j]) ? fabs(rowval[j]) : maxtf);
+  
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          rowval[j] = .1 + .9*rowval[j]/maxtf;
+      }
+      break;
+
+    case GK_CSR_SQRT: /* TF' = .1+SQRT(TF) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], sqrt(fabs(rowval[j])));
+        }
+      }
+      
+      break;
+
+    case GK_CSR_POW25: /* TF' = .1+POW(TF,.25) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], sqrt(sqrt(fabs(rowval[j]))));
+        }
+      }
+      break;
+
+    case GK_CSR_POW65: /* TF' = .1+POW(TF,.65) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .65));
+        }
+      }
+      break;
+
+    case GK_CSR_POW75: /* TF' = .1+POW(TF,.75) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .75));
+        }
+      }
+      break;
+
+    case GK_CSR_POW85: /* TF' = .1+POW(TF,.85) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = .1+sign(rowval[j], powf(fabs(rowval[j]), .85));
+        }
+      }
+      break;
+
+    case GK_CSR_LOG: /* TF' = 1+log_2(TF) */
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) schedule(static,32)
+      for (i=0; i<rowptr[nrows]; i++) {
+        if (rowval[i] != 0.0)
+          rowval[i] = 1+(rowval[i]>0.0 ? log(rowval[i]) : -log(-rowval[i]))*logscale;
+      }
+#ifdef XXX
+      #pragma omp parallel for private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) { 
+          if (rowval[j] != 0.0)
+            rowval[j] = 1+(rowval[j]>0.0 ? log(rowval[j]) : -log(-rowval[j]))*logscale;
+            //rowval[j] = 1+sign(rowval[j], log(fabs(rowval[j]))*logscale);
+        }
+      }
+#endif
+      break;
+
+    case GK_CSR_IDF: /* TF' = TF*IDF */
+      ncols  = mat->ncols;
+      cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale");
+      collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          collen[rowind[j]]++;
+      }
+
+      #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static)
+      for (i=0; i<ncols; i++)
+        cscale[i] = (collen[i] > 0 ? log(1.0*nrows/collen[i]) : 0.0);
+
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          rowval[j] *= cscale[rowind[j]];
+      }
+      
+      gk_free((void **)&cscale, &collen, LTERM);
+      break;
+
+    case GK_CSR_IDF2: /* TF' = TF*IDF */
+      ncols  = mat->ncols;
+      cscale = gk_fmalloc(ncols, "gk_csr_Scale: cscale");
+      collen = gk_ismalloc(ncols, 0, "gk_csr_Scale: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          collen[rowind[j]]++;
+      }
+
+      nnzcols = 0;
+      #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static) reduction(+:nnzcols)
+      for (i=0; i<ncols; i++)
+        nnzcols += (collen[i] > 0 ? 1 : 0);
+
+      bgfreq = gk_max(10, (ssize_t)(.5*rowptr[nrows]/nnzcols));
+      printf("nnz: %zd, nnzcols: %d, bgfreq: %d\n", rowptr[nrows], nnzcols, bgfreq);
+
+      #pragma omp parallel for if (ncols > OMPMINOPS) schedule(static)
+      for (i=0; i<ncols; i++)
+        cscale[i] = (collen[i] > 0 ? log(1.0*(nrows+2*bgfreq)/(bgfreq+collen[i])) : 0.0);
+
+      #pragma omp parallel for if (rowptr[nrows] > OMPMINOPS) private(j) schedule(static)
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++)
+          rowval[j] *= cscale[rowind[j]];
+      }
+
+      gk_free((void **)&cscale, &collen, LTERM);
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown scaling type of %d\n", type);
+  }
+}
+
+
+/*************************************************************************/
+/*! Computes the sums of the rows/columns
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which 
+           sums to compute.
+*/
+/**************************************************************************/
+void gk_csr_ComputeSums(gk_csr_t *mat, int what)
+{
+  ssize_t i;
+  int n;
+  ssize_t *ptr;
+  float *val, *sums;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      val = mat->rowval;
+
+      if (mat->rsums) 
+        gk_free((void **)&mat->rsums, LTERM);
+
+      sums = mat->rsums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums");
+      break;
+    case GK_CSR_COL:
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      val = mat->colval;
+
+      if (mat->csums) 
+        gk_free((void **)&mat->csums, LTERM);
+
+      sums = mat->csums = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: sums");
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid sum type of %d.\n", what);
+      return;
+  }
+
+  if (val) {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      sums[i] = gk_fsum(ptr[i+1]-ptr[i], val+ptr[i], 1);
+  }
+  else {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      sums[i] = ptr[i+1]-ptr[i];
+  }
+}
+
+
+/*************************************************************************/
+/*! Computes the norms of the rows/columns
+
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which 
+           squared norms to compute.
+
+    \note If the rowval/colval arrays are NULL, the matrix is assumed
+          to be binary and the norms are computed accordingly.
+*/
+/**************************************************************************/
+void gk_csr_ComputeNorms(gk_csr_t *mat, int what)
+{
+  ssize_t i;
+  int n;
+  ssize_t *ptr;
+  float *val, *norms;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      val = mat->rowval;
+
+      if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM);
+
+      norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    case GK_CSR_COL:
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      val = mat->colval;
+
+      if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM);
+
+      norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid norm type of %d.\n", what);
+      return;
+  }
+
+  if (val) {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      norms[i] = sqrt(gk_fdot(ptr[i+1]-ptr[i], val+ptr[i], 1, val+ptr[i], 1));
+  }
+  else {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      norms[i] = sqrt(ptr[i+1]-ptr[i]);
+  }
+}
+
+
+/*************************************************************************/
+/*! Computes the squared of the norms of the rows/columns
+
+    \param mat the matrix itself,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating which 
+           squared norms to compute.
+
+    \note If the rowval/colval arrays are NULL, the matrix is assumed
+          to be binary and the norms are computed accordingly.
+*/
+/**************************************************************************/
+void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what)
+{
+  ssize_t i;
+  int n;
+  ssize_t *ptr;
+  float *val, *norms;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      n   = mat->nrows;
+      ptr = mat->rowptr;
+      val = mat->rowval;
+
+      if (mat->rnorms) gk_free((void **)&mat->rnorms, LTERM);
+
+      norms = mat->rnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    case GK_CSR_COL:
+      n   = mat->ncols;
+      ptr = mat->colptr;
+      val = mat->colval;
+
+      if (mat->cnorms) gk_free((void **)&mat->cnorms, LTERM);
+
+      norms = mat->cnorms = gk_fsmalloc(n, 0, "gk_csr_ComputeSums: norms");
+      break;
+    default:
+      gk_errexit(SIGERR, "Invalid norm type of %d.\n", what);
+      return;
+  }
+
+  if (val) {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      norms[i] = gk_fdot(ptr[i+1]-ptr[i], val+ptr[i], 1, val+ptr[i], 1);
+  }
+  else {
+    #pragma omp parallel for if (ptr[n] > OMPMINOPS) schedule(static)
+    for (i=0; i<n; i++) 
+      norms[i] = ptr[i+1]-ptr[i];
+  }
+}
+
+
+/*************************************************************************/
+/*! Returns a new matrix whose rows/columns are shuffled.
+   
+    \param mat the matrix to be shuffled,
+    \param what indicates if the rows (GK_CSR_ROW), columns (GK_CSR_COL),
+           or both (GK_CSR_ROWCOL) will be shuffled,
+    \param symmetric indicates if the same shuffling will be applied to 
+           both rows and columns. This is valid with nrows==ncols and 
+           GK_CSR_ROWCOL was specified.
+    \returns the shuffled matrix. 
+          The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Shuffle(gk_csr_t *mat, int what, int symmetric)
+{
+  ssize_t i, j;
+  int nrows, ncols;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind;
+  int *rperm, *cperm;
+  float *rowval, *nrowval;
+  gk_csr_t *nmat;
+
+  if (what == GK_CSR_ROWCOL && symmetric && mat->nrows != mat->ncols)
+    gk_errexit(SIGERR, "The matrix is not square for a symmetric rowcol shuffling.\n");
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  rperm = gk_imalloc(nrows, "gk_csr_Shuffle: rperm");
+  cperm = gk_imalloc(ncols, "gk_csr_Shuffle: cperm");
+
+  switch (what) {
+    case GK_CSR_ROW:
+      gk_RandomPermute(nrows, rperm, 1);
+      for (i=0; i<20; i++)
+        gk_RandomPermute(nrows, rperm, 0);
+
+      for (i=0; i<ncols; i++)
+        cperm[i] = i;
+      break;
+
+    case GK_CSR_COL:
+      gk_RandomPermute(ncols, cperm, 1);
+      for (i=0; i<20; i++)
+        gk_RandomPermute(ncols, cperm, 0);
+
+      for (i=0; i<nrows; i++)
+        rperm[i] = i;
+      break;
+
+    case GK_CSR_ROWCOL:
+      gk_RandomPermute(nrows, rperm, 1);
+      for (i=0; i<20; i++)
+        gk_RandomPermute(nrows, rperm, 0);
+
+      if (symmetric)
+        gk_icopy(nrows, rperm, cperm);
+      else {
+        gk_RandomPermute(ncols, cperm, 1);
+        for (i=0; i<20; i++)
+          gk_RandomPermute(ncols, cperm, 0);
+      }
+      break;
+
+    default:
+      gk_free((void **)&rperm, &cperm, LTERM);
+      gk_errexit(SIGERR, "Unknown shuffling type of %d\n", what);
+      return NULL;
+  }
+
+  nmat = gk_csr_Create();
+  nmat->nrows = nrows;
+  nmat->ncols = ncols;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_Shuffle: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(rowptr[nrows], "gk_csr_Shuffle: nrowind");
+  nrowval = nmat->rowval = (rowval ? gk_fmalloc(rowptr[nrows], "gk_csr_Shuffle: nrowval") : NULL) ;
+
+  for (i=0; i<nrows; i++)
+    nrowptr[rperm[i]] = rowptr[i+1]-rowptr[i];
+  MAKECSR(i, nrows, nrowptr);
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      nrowind[nrowptr[rperm[i]]] = cperm[rowind[j]];
+      if (nrowval)
+        nrowval[nrowptr[rperm[i]]] = rowval[j];
+      nrowptr[rperm[i]]++;
+    }
+  }
+  SHIFTCSR(i, nrows, nrowptr);
+
+  gk_free((void **)&rperm, &cperm, LTERM);
+
+  return nmat;
+
+}
+
+
+/*************************************************************************/
+/*! Returns the transpose of the matrix.
+   
+    \param mat the matrix to be transposed,
+    \returns the transposed matrix. 
+          The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_Transpose(gk_csr_t *mat)
+{
+  int nrows, ncols;
+  ssize_t *colptr;
+  int32_t *colind;
+  float *colval;
+  gk_csr_t *nmat;
+
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colval = mat->colval;
+
+  mat->colptr = NULL;
+  mat->colind = NULL;
+  mat->colval = NULL;
+
+  gk_csr_CreateIndex(mat, GK_CSR_COL);
+
+  nmat = gk_csr_Create();
+  nmat->nrows  = mat->ncols;
+  nmat->ncols  = mat->nrows;
+  nmat->rowptr = mat->colptr;
+  nmat->rowind = mat->colind;
+  nmat->rowval = mat->colval;
+
+  mat->colptr = colptr;
+  mat->colind = colind;
+  mat->colval = colval;
+
+  return nmat;
+
+}
+
+
+/*************************************************************************/
+/*! Computes the similarity between two rows/columns
+
+    \param mat the matrix itself. The routine assumes that the indices
+           are sorted in increasing order.
+    \param i1 is the first row/column,
+    \param i2 is the second row/column,
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating the type of
+           objects between the similarity will be computed,
+    \param simtype is the type of similarity and is one of GK_CSR_COS,
+           GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN
+    \returns the similarity between the two rows/columns.
+*/
+/**************************************************************************/
+float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, 
+          int simtype)
+{
+  int nind1, nind2;
+  int *ind1, *ind2;
+  float *val1, *val2, stat1, stat2, sim;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      if (!mat->rowptr)
+        gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
+      nind1 = mat->rowptr[i1+1]-mat->rowptr[i1];
+      nind2 = mat->rowptr[i2+1]-mat->rowptr[i2];
+      ind1  = mat->rowind + mat->rowptr[i1];
+      ind2  = mat->rowind + mat->rowptr[i2];
+      val1  = mat->rowval + mat->rowptr[i1];
+      val2  = mat->rowval + mat->rowptr[i2];
+      break;
+
+    case GK_CSR_COL:
+      if (!mat->colptr)
+        gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
+      nind1 = mat->colptr[i1+1]-mat->colptr[i1];
+      nind2 = mat->colptr[i2+1]-mat->colptr[i2];
+      ind1  = mat->colind + mat->colptr[i1];
+      ind2  = mat->colind + mat->colptr[i2];
+      val1  = mat->colval + mat->colptr[i1];
+      val2  = mat->colval + mat->colptr[i2];
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return 0.0;
+  }
+
+
+  switch (simtype) {
+    case GK_CSR_COS:
+    case GK_CSR_JAC:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else {
+          sim   += val1[i1]*val2[i2];
+          stat1 += val1[i1]*val1[i1];
+          stat2 += val2[i2]*val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      if (simtype == GK_CSR_COS)
+        sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0);
+      else 
+        sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+      break;
+
+    case GK_CSR_MIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+
+      break;
+
+    case GK_CSR_AMIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1 > 0.0 ? sim/stat1 : 0.0);
+
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
+      return -1;
+  }
+
+  return sim;
+
+}
+
+
+/*************************************************************************/
+/*! Computes the similarity between two rows/columns
+
+    \param mat_a the first matrix. The routine assumes that the indices
+           are sorted in increasing order.
+    \param mat_b the second matrix. The routine assumes that the indices
+           are sorted in increasing order.
+    \param i1 is the row/column from the first matrix (mat_a),
+    \param i2 is the row/column from the second matrix (mat_b),
+    \param what is either GK_CSR_ROW or GK_CSR_COL indicating the type of
+           objects between the similarity will be computed,
+    \param simtype is the type of similarity and is one of GK_CSR_COS,
+           GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN
+    \returns the similarity between the two rows/columns.
+*/
+/**************************************************************************/
+float gk_csr_ComputePairSimilarity(gk_csr_t *mat_a, gk_csr_t *mat_b, 
+          int i1, int i2, int what, int simtype)
+{
+  int nind1, nind2;
+  int *ind1, *ind2;
+  float *val1, *val2, stat1, stat2, sim;
+
+  switch (what) {
+    case GK_CSR_ROW:
+      if (!mat_a->rowptr || !mat_b->rowptr)
+        gk_errexit(SIGERR, "Row-based view of the matrix does not exists.\n");
+      nind1 = mat_a->rowptr[i1+1]-mat_a->rowptr[i1];
+      nind2 = mat_b->rowptr[i2+1]-mat_b->rowptr[i2];
+      ind1  = mat_a->rowind + mat_a->rowptr[i1];
+      ind2  = mat_b->rowind + mat_b->rowptr[i2];
+      val1  = mat_a->rowval + mat_a->rowptr[i1];
+      val2  = mat_b->rowval + mat_b->rowptr[i2];
+      break;
+
+    case GK_CSR_COL:
+      if (!mat_a->colptr || !mat_b->colptr)
+        gk_errexit(SIGERR, "Column-based view of the matrix does not exists.\n");
+      nind1 = mat_a->colptr[i1+1]-mat_a->colptr[i1];
+      nind2 = mat_b->colptr[i2+1]-mat_b->colptr[i2];
+      ind1  = mat_a->colind + mat_a->colptr[i1];
+      ind2  = mat_b->colind + mat_b->colptr[i2];
+      val1  = mat_a->colval + mat_a->colptr[i1];
+      val2  = mat_b->colval + mat_b->colptr[i2];
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Invalid index type of %d.\n", what);
+      return 0.0;
+  }
+
+
+  switch (simtype) {
+    case GK_CSR_COS:
+    case GK_CSR_JAC:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1]*val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2]*val2[i2];
+          i2++;
+        }
+        else {
+          sim   += val1[i1]*val2[i2];
+          stat1 += val1[i1]*val1[i1];
+          stat2 += val2[i2]*val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      if (simtype == GK_CSR_COS)
+        sim = (stat1*stat2 > 0.0 ? sim/sqrt(stat1*stat2) : 0.0);
+      else 
+        sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+      break;
+
+    case GK_CSR_MIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1+stat2-sim > 0.0 ? sim/(stat1+stat2-sim) : 0.0);
+
+      break;
+
+    case GK_CSR_AMIN:
+      sim = stat1 = stat2 = 0.0;
+      i1 = i2 = 0;
+      while (i1<nind1 && i2<nind2) {
+        if (i1 == nind1) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else if (i2 == nind2) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] < ind2[i2]) {
+          stat1 += val1[i1];
+          i1++;
+        }
+        else if (ind1[i1] > ind2[i2]) {
+          stat2 += val2[i2];
+          i2++;
+        }
+        else {
+          sim   += gk_min(val1[i1],val2[i2]);
+          stat1 += val1[i1];
+          stat2 += val2[i2];
+          i1++;
+          i2++;
+        }
+      }
+      sim = (stat1 > 0.0 ? sim/stat1 : 0.0);
+
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
+      return -1;
+  }
+
+  return sim;
+
+}
+
+/*************************************************************************/
+/*! Finds the n most similar rows (neighbors) to the query.
+
+    \param mat the matrix itself
+    \param nqterms is the number of columns in the query
+    \param qind is the list of query columns
+    \param qval is the list of correspodning query weights
+    \param simtype is the type of similarity and is one of GK_CSR_DOTP,
+           GK_CSR_COS, GK_CSR_JAC, GK_CSR_MIN, GK_CSR_AMIN. In case of 
+           GK_CSR_COS, the rows and the query are assumed to be of unit 
+           length.
+    \param nsim is the maximum number of requested most similar rows.
+           If -1 is provided, then everything is returned unsorted.
+    \param minsim is the minimum similarity of the requested most 
+           similar rows
+    \param hits is the result set. This array should be at least
+           of length nsim.
+    \param i_marker is an array of size equal to the number of rows
+           whose values are initialized to -1. If NULL is provided
+           then this array is allocated and freed internally.
+    \param i_cand is an array of size equal to the number of rows.
+           If NULL is provided then this array is allocated and freed 
+           internally.
+    \returns The number of identified most similar rows, which can be
+             smaller than the requested number of nnbrs in those cases
+             in which there are no sufficiently many neighbors.
+*/
+/**************************************************************************/
+int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, 
+        float *qval, int simtype, int nsim, float minsim, gk_fkv_t *hits, 
+        int *i_marker, gk_fkv_t *i_cand)
+{
+  ssize_t i, ii, j, k;
+  int nrows, ncols, ncand;
+  ssize_t *colptr;
+  int *colind, *marker;
+  float *colval, *rnorms, mynorm, *rsums, mysum;
+  gk_fkv_t *cand;
+
+  if (nqterms == 0)
+    return 0;
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  GKASSERT((colptr = mat->colptr) != NULL);
+  GKASSERT((colind = mat->colind) != NULL);
+  GKASSERT((colval = mat->colval) != NULL);
+
+  marker = (i_marker ? i_marker : gk_ismalloc(nrows, -1, "gk_csr_SimilarRows: marker"));
+  cand   = (i_cand   ? i_cand   : gk_fkvmalloc(nrows, "gk_csr_SimilarRows: cand"));
+
+  switch (simtype) {
+    case GK_CSR_DOTP:
+    case GK_CSR_COS:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += colval[j]*qval[ii];
+          }
+        }
+      }
+      break;
+
+    case GK_CSR_JAC:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += colval[j]*qval[ii];
+          }
+        }
+      }
+
+      GKASSERT((rnorms = mat->rnorms) != NULL);
+      mynorm = gk_fdot(nqterms, qval, 1, qval, 1);
+
+      for (i=0; i<ncand; i++)
+        cand[i].key = cand[i].key/(rnorms[cand[i].val]+mynorm-cand[i].key);
+      break;
+
+    case GK_CSR_MIN:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += gk_min(colval[j], qval[ii]);
+          }
+        }
+      }
+
+      GKASSERT((rsums = mat->rsums) != NULL);
+      mysum = gk_fsum(nqterms, qval, 1);
+
+      for (i=0; i<ncand; i++)
+        cand[i].key = cand[i].key/(rsums[cand[i].val]+mysum-cand[i].key);
+      break;
+
+    /* Assymetric MIN  similarity */
+    case GK_CSR_AMIN:
+      for (ncand=0, ii=0; ii<nqterms; ii++) {
+        i = qind[ii];
+        if (i < ncols) {
+          for (j=colptr[i]; j<colptr[i+1]; j++) {
+            k = colind[j];
+            if (marker[k] == -1) {
+              cand[ncand].val = k;
+              cand[ncand].key = 0;
+              marker[k]       = ncand++;
+            }
+            cand[marker[k]].key += gk_min(colval[j], qval[ii]);
+          }
+        }
+      }
+
+      mysum = gk_fsum(nqterms, qval, 1);
+
+      for (i=0; i<ncand; i++)
+        cand[i].key = cand[i].key/mysum;
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown similarity measure %d\n", simtype);
+      return -1;
+  }
+
+  /* go and prune the hits that are bellow minsim */
+  for (j=0, i=0; i<ncand; i++) {
+    marker[cand[i].val] = -1;
+    if (cand[i].key >= minsim) 
+      cand[j++] = cand[i];
+  }
+  ncand = j;
+
+  if (nsim == -1 || nsim >= ncand) {
+    nsim = ncand;
+  }
+  else {
+    nsim = gk_min(nsim, ncand);
+    gk_dfkvkselect(ncand, nsim, cand);
+    gk_fkvsortd(nsim, cand);
+  }
+
+  gk_fkvcopy(nsim, cand, hits);
+
+  if (i_marker == NULL)
+    gk_free((void **)&marker, LTERM);
+  if (i_cand == NULL)
+    gk_free((void **)&cand, LTERM);
+
+  return nsim;
+}
+
+
+/*************************************************************************/
+/*! Returns a symmetric version of a square matrix. The symmetric version
+    is constructed by applying an A op A^T operation, where op is one of
+    GK_CSR_SYM_SUM, GK_CSR_SYM_MIN, GK_CSR_SYM_MAX, GK_CSR_SYM_AVG.
+   
+    \param mat the matrix to be symmetrized,
+    \param op indicates the operation to be performed. The possible values are
+           GK_CSR_SYM_SUM, GK_CSR_SYM_MIN, GK_CSR_SYM_MAX, and GK_CSR_SYM_AVG.
+
+    \returns the symmetrized matrix consisting only of its row-based structure. 
+          The input matrix is not modified. 
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_MakeSymmetric(gk_csr_t *mat, int op)
+{
+  ssize_t i, j, k, nnz;
+  int nrows, nadj, hasvals;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind, *marker, *ids;
+  float *rowval=NULL, *colval=NULL, *nrowval=NULL, *wgts=NULL;
+  gk_csr_t *nmat;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_MakeSymmetric: The matrix needs to be square.\n");
+    return NULL;
+  }
+
+  hasvals = (mat->rowval != NULL);
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  if (hasvals)
+    rowval = mat->rowval;
+
+  /* create the column view for efficient processing */
+  colptr = gk_zsmalloc(nrows+1, 0, "colptr");
+  colind = gk_i32malloc(rowptr[nrows], "colind");
+  if (hasvals)
+    colval = gk_fmalloc(rowptr[nrows], "colval");
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      colptr[rowind[j]]++;
+  }
+  MAKECSR(i, nrows, colptr);
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      colind[colptr[rowind[j]]] = i;
+      if (hasvals)
+        colval[colptr[rowind[j]]] = rowval[j];
+      colptr[rowind[j]]++;
+    }
+  }
+  SHIFTCSR(i, nrows, colptr);
+
+
+  nmat = gk_csr_Create();
+  
+  nmat->nrows = mat->nrows;
+  nmat->ncols = mat->ncols;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_MakeSymmetric: nrowptr");
+  nrowind = nmat->rowind = gk_imalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowind");
+  if (hasvals)
+    nrowval = nmat->rowval = gk_fmalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval");
+
+  marker = gk_ismalloc(nrows, -1, "marker");
+  ids    = gk_imalloc(nrows, "ids");
+  if (hasvals)
+    wgts = gk_fmalloc(nrows, "wgts");
+
+  nrowptr[0] = nnz = 0;
+  for (i=0; i<nrows; i++) {
+    nadj = 0;
+    /* out-edges */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      ids[nadj] = rowind[j]; 
+      if (hasvals)
+        wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*rowval[j] : rowval[j]);
+      marker[rowind[j]] = nadj++;
+    }
+
+    /* in-edges */
+    for (j=colptr[i]; j<colptr[i+1]; j++) {
+      if (marker[colind[j]] == -1) {
+        if (op != GK_CSR_SYM_MIN) {
+          ids[nadj] = colind[j]; 
+          if (hasvals) 
+            wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*colval[j] : colval[j]);
+          nadj++;
+        }
+      }
+      else {
+        if (hasvals) {
+          switch (op) {
+            case GK_CSR_SYM_MAX:
+              wgts[marker[colind[j]]] = gk_max(colval[j], wgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_MIN:
+              wgts[marker[colind[j]]] = gk_min(colval[j], wgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_SUM:
+              wgts[marker[colind[j]]] += colval[j];
+              break;
+            case GK_CSR_SYM_AVG:
+              wgts[marker[colind[j]]] = 0.5*(wgts[marker[colind[j]]] + colval[j]);
+              break;
+            default:
+              errexit("Unsupported op for MakeSymmetric!\n");
+          }
+        }
+        marker[colind[j]] = -1;
+      }
+    }
+
+    /* go over out edges again to resolve any edges that were not found in the in
+     * edges */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      if (marker[rowind[j]] != -1) {
+        if (op == GK_CSR_SYM_MIN)
+          ids[marker[rowind[j]]] = -1;
+        marker[rowind[j]] = -1;
+      }
+    }
+
+    /* put the non '-1' entries in ids[] into i's row */
+    for (j=0; j<nadj; j++) {
+      if (ids[j] != -1) {
+        nrowind[nnz] = ids[j];
+        if (hasvals)
+          nrowval[nnz] = wgts[j];
+        nnz++;
+      }
+    }
+    nrowptr[i+1] = nnz;
+  }
+
+  gk_free((void **)&colptr, &colind, &colval, &marker, &ids, &wgts, LTERM);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! This function finds the connected components in a graph stored in
+    CSR format.
+
+    \param mat is the graph structure in CSR format
+    \param cptr is the ptr structure of the CSR representation of the 
+           components. The length of this vector must be mat->nrows+1.
+    \param cind is the indices structure of the CSR representation of 
+           the components. The length of this vector must be mat->nrows.
+    \param cids is an array that stores the component # of each vertex
+           of the graph. The length of this vector must be mat->nrows.
+
+    \returns the number of components that it found.
+
+    \note The cptr, cind, and cids parameters can be NULL, in which case 
+          only the number of connected components is returned.
+*/
+/*************************************************************************/
+int gk_csr_FindConnectedComponents(gk_csr_t *mat, int32_t *cptr, int32_t *cind, 
+        int32_t *cids)
+{
+  ssize_t i, ii, j, jj, k, nvtxs, first, last, ntodo, ncmps;
+  ssize_t *xadj;
+  int32_t *adjncy, *pos, *todo;
+  int32_t mustfree_ccsr=0, mustfree_where=0;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_FindComponents: The matrix needs to be square.\n");
+    return -1;
+  }
+
+  nvtxs  = mat->nrows;
+  xadj   = mat->rowptr;
+  adjncy = mat->rowind;
+
+  /* Deal with NULL supplied cptr/cind vectors */
+  if (cptr == NULL) {
+    cptr = gk_i32malloc(nvtxs+1, "gk_csr_FindComponents: cptr");
+    cind = gk_i32malloc(nvtxs, "gk_csr_FindComponents: cind");
+    mustfree_ccsr = 1;
+  }
+
+  /* The list of vertices that have not been touched yet. 
+     The valid entries are from [0..ntodo). */
+  todo = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_csr_FindComponents: todo"));
+
+  /* For a vertex that has not been visited, pos[i] is the position in the
+     todo list that this vertex is stored. 
+     If a vertex has been visited, pos[i] = -1. */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_csr_FindComponents: pos"));
+
+
+  /* Find the connected componends */
+  ncmps = -1;
+  ntodo = nvtxs;     /* All vertices have not been visited */
+  first = last = 0;  /* Point to the first and last vertices that have been touched
+                        but not explored. 
+                        These vertices are stored in cind[first]...cind[last-1]. */
+
+  while (first < last || ntodo > 0) {
+    if (first == last) { /* Find another starting vertex */
+      cptr[++ncmps] = first;  /* Mark the end of the current CC */
+
+      /* put the first vertex in the todo list as the start of the new CC */
+      ASSERT(pos[todo[0]] != -1);
+      cind[last++] = todo[0];  
+
+      pos[todo[0]] = -1;
+      todo[0] = todo[--ntodo];
+      pos[todo[0]] = 0;
+    }
+
+    i = cind[first++];  /* Get the first visited but unexplored vertex */
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      if (pos[k] != -1) {
+        cind[last++] = k;
+
+        /* Remove k from the todo list and put the last item in the todo 
+           list at the position that k was so that the todo list will be
+           consequtive. The pos[] array is updated accordingly to keep track
+           the location of the vertices in the todo[] list. */
+        todo[pos[k]] = todo[--ntodo];
+        pos[todo[pos[k]]] = pos[k];
+        pos[k] = -1;
+      }
+    }
+  }
+  cptr[++ncmps] = first;
+
+  /* see if we need to return cids */
+  if (cids != NULL) {
+    for (i=0; i<ncmps; i++) {
+      for (j=cptr[i]; j<cptr[i+1]; j++)
+        cids[cind[j]] = i;
+    }
+  }
+
+  if (mustfree_ccsr)
+    gk_free((void **)&cptr, &cind, LTERM);
+
+  gk_free((void **)&pos, &todo, LTERM);
+
+  return (int) ncmps;
+}
+
+
+/*************************************************************************/
+/*! Returns a matrix that has been reordered according to the provided
+    row/column permutation. The matrix is required to be square and the same
+    permutation is applied to both rows and columns.
+
+    \param[IN] mat is the matrix to be re-ordered.
+    \param[IN] perm is the new ordering of the rows & columns
+    \param[IN] iperm is the original ordering of the re-ordered matrix's rows & columns
+    \returns the newly created reordered matrix.
+
+    \note Either perm or iperm can be NULL but not both.
+*/
+/**************************************************************************/
+gk_csr_t *gk_csr_ReorderSymmetric(gk_csr_t *mat, int32_t *perm, int32_t *iperm)
+{
+  ssize_t j, jj;
+  ssize_t *rowptr, *nrowptr;
+  int i, k, u, v, nrows;
+  int freeperm=0, freeiperm=0;
+  int32_t *rowind, *nrowind;
+  float *rowval, *nrowval;
+  gk_csr_t *nmat;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_ReorderSymmetric: The matrix needs to be square.\n");
+    return NULL;
+  }
+
+  if (perm == NULL && iperm == NULL)
+    return NULL;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  nmat = gk_csr_Create();
+
+  nmat->nrows = nrows;
+  nmat->ncols = nrows;
+
+  nrowptr = nmat->rowptr = gk_zmalloc(nrows+1, "gk_csr_ReorderSymmetric: rowptr");
+  nrowind = nmat->rowind = gk_i32malloc(rowptr[nrows], "gk_csr_ReorderSymmetric: rowind");
+  nrowval = nmat->rowval = gk_fmalloc(rowptr[nrows], "gk_csr_ReorderSymmetric: rowval");
+
+  /* allocate memory for the different structures present in the matrix */
+  if (mat->rlabels)
+    nmat->rlabels = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: rlabels");
+  if (mat->rmap)
+    nmat->rmap = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: rmap");
+  if (mat->rnorms)
+    nmat->rnorms = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rnorms");
+  if (mat->rsums)
+    nmat->rsums = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rsums");
+  if (mat->rsizes)
+    nmat->rsizes = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rsizes");
+  if (mat->rvols)
+    nmat->rvols = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rvols");
+  if (mat->rwgts)
+    nmat->rwgts = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: rwgts");
+
+  if (mat->clabels)
+    nmat->clabels = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: clabels");
+  if (mat->cmap)
+    nmat->cmap = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: cmap");
+  if (mat->cnorms)
+    nmat->cnorms = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cnorms");
+  if (mat->csums)
+    nmat->csums = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: csums");
+  if (mat->csizes)
+    nmat->csizes = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: csizes");
+  if (mat->cvols)
+    nmat->cvols = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cvols");
+  if (mat->cwgts)
+    nmat->cwgts = gk_fmalloc(nrows, "gk_csr_ReorderSymmetric: cwgts");
+
+
+
+  /* create perm/iperm if not provided */
+  if (perm == NULL) {
+    freeperm = 1;
+    perm = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: perm"); 
+    for (i=0; i<nrows; i++)
+      perm[iperm[i]] = i;
+  }
+  if (iperm == NULL) {
+    freeiperm = 1;
+    iperm = gk_i32malloc(nrows, "gk_csr_ReorderSymmetric: iperm"); 
+    for (i=0; i<nrows; i++)
+      iperm[perm[i]] = i;
+  }
+
+  /* fill-in the information of the re-ordered matrix */
+  nrowptr[0] = jj = 0;
+  for (v=0; v<nrows; v++) {
+    u = iperm[v];
+    for (j=rowptr[u]; j<rowptr[u+1]; j++, jj++) {
+      nrowind[jj] = perm[rowind[j]];
+      nrowval[jj] = rowval[j];
+    }
+
+    if (mat->rlabels)
+      nmat->rlabels[v] = mat->rlabels[u];
+    if (mat->rmap)
+      nmat->rmap[v] = mat->rmap[u];
+    if (mat->rnorms)
+      nmat->rnorms[v] = mat->rnorms[u];
+    if (mat->rsums)
+      nmat->rsums[v] = mat->rsums[u];
+    if (mat->rsizes)
+      nmat->rsizes[v] = mat->rsizes[u];
+    if (mat->rvols)
+      nmat->rvols[v] = mat->rvols[u];
+    if (mat->rwgts)
+      nmat->rwgts[v] = mat->rwgts[u];
+
+    if (mat->clabels)
+      nmat->clabels[v] = mat->clabels[u];
+    if (mat->cmap)
+      nmat->cmap[v] = mat->cmap[u];
+    if (mat->cnorms)
+      nmat->cnorms[v] = mat->cnorms[u];
+    if (mat->csums)
+      nmat->csums[v] = mat->csums[u];
+    if (mat->csizes)
+      nmat->csizes[v] = mat->csizes[u];
+    if (mat->cvols)
+      nmat->cvols[v] = mat->cvols[u];
+    if (mat->cwgts)
+      nmat->cwgts[v] = mat->cwgts[u];
+
+    nrowptr[v+1] = jj;
+  }
+
+
+  /* free memory */
+  if (freeperm)
+    gk_free((void **)&perm, LTERM);
+  if (freeiperm)
+    gk_free((void **)&iperm, LTERM);
+
+  return nmat;
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the rows/columns of a symmetric
+    matrix based on a breadth-first-traversal. It can be used for re-ordering 
+    the matrix to reduce its bandwidth for better cache locality.
+
+    \param[IN]  mat is the matrix whose ordering to be computed.
+    \param[IN]  maxdegree is the maximum number of nonzeros of the rows that
+                will participate in the BFS ordering. Rows with more nonzeros
+                will be put at the front of the ordering in decreasing degree
+                order. 
+    \param[IN]  v is the starting row of the BFS. A value of -1 indicates that
+                a randomly selected row will be used.
+    \param[OUT] perm[i] stores the ID of row i in the re-ordered matrix.
+    \param[OUT] iperm[i] stores the ID of the row that corresponds to 
+                the ith vertex in the re-ordered matrix.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_csr_ComputeBFSOrderingSymmetric(gk_csr_t *mat, int maxdegree, int v, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  int i, k, nrows, first, last;
+  ssize_t j, *rowptr;
+  int32_t *rowind, *cot, *pos;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_ComputeBFSOrderingSymmetric: The matrix needs to be square.\n");
+    return;
+  }
+  if (maxdegree < mat->nrows && v != -1) {
+    fprintf(stderr, "gk_csr_ComputeBFSOrderingSymmetric: Since maxdegree node renumbering is requested the starting row should be -1.\n");
+    return;
+  }
+  if (mat->nrows <= 0)
+    return;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+
+  /* This array will function like pos + touched of the CC method */
+  pos = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBFSOrderingSymmetric: pos"));
+
+  /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. 
+     Positions from [0...first) is the current iperm[] vector of the explored rows; 
+     Positions from [first...last) is the OPEN list (i.e., visited rows);
+     Positions from [last...nrows) is the todo list. */
+  cot = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBFSOrderingSymmetric: cot"));
+
+  first = last = 0;
+
+  /* deal with maxdegree handling */
+  if (maxdegree < nrows) {
+    last = nrows;
+    for (i=nrows-1; i>=0; i--) {
+      if (rowptr[i+1]-rowptr[i] < maxdegree) {
+        cot[--last] = i;
+        pos[i] = last;
+      }
+      else {
+        cot[first++] = i;
+        pos[i] = -1;
+      }
+    }
+    GKASSERT(first == last);
+
+    if (last > 0) { /* reorder them in degree decreasing order */
+      gk_ikv_t *cand = gk_ikvmalloc(first, "gk_csr_ComputeBFSOrderingSymmetric: cand");
+
+      for (i=0; i<first; i++) {
+        k = cot[i];
+        cand[i].key = (int)(rowptr[k+1]-rowptr[k]);
+        cand[i].val = k;
+      }
+
+      gk_ikvsortd(first, cand);
+      for (i=0; i<first; i++) 
+        cot[i] = cand[i].val;
+
+      gk_free((void **)&cand, LTERM);
+    }
+
+    v = cot[last + RandomInRange(nrows-last)];
+  }
+
+
+  /* swap v with the front of the todo list */
+  cot[pos[v]] = cot[last];
+  pos[cot[last]] = pos[v];
+
+  cot[last] = v;
+  pos[v] = last;
+
+
+  /* start processing the nodes */
+  while (first < nrows) {
+    if (first == last) { /* find another starting row */
+      k = cot[last];
+      GKASSERT(pos[k] != -1);
+      pos[k] = -1; /* mark node as being visited */
+      last++;
+    }
+
+    i = cot[first++];  /* the ++ advances the explored rows */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      k = rowind[j];
+      /* if a node has already been visited, its perm[] will be -1 */
+      if (pos[k] != -1) {
+        /* pos[k] is the location within iperm of where k resides (it is in the 'todo' part); 
+           It is placed in that location cot[last] (end of OPEN list) that we 
+           are about to overwrite and update pos[cot[last]] to reflect that. */
+        cot[pos[k]]    = cot[last]; /* put the head of the todo list to 
+                                       where k was in the todo list */
+        pos[cot[last]] = pos[k];    /* update perm to reflect the move */
+
+        cot[last++] = k;  /* put node at the end of the OPEN list */
+        pos[k]      = -1; /* mark node as being visited */
+      }
+    }
+  }
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    /* use the 'pos' array to build the perm array */
+    for (i=0; i<nrows; i++)
+      pos[cot[i]] = i;
+
+    *r_perm = pos;
+    pos = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    *r_iperm = cot;
+    cot = NULL;
+  }
+
+  /* cleanup memory */
+  gk_free((void **)&pos, &cot, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the rows of a symmetric matrix
+    based on a best-first-traversal. It can be used for re-ordering the matrix
+    to reduce its bandwidth for better cache locality.
+
+    \param[IN]  mat is the matrix structure.
+    \param[IN]  v is the starting row of the best-first traversal.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a row.
+    \param[OUT] perm[i] stores the ID of row i in the re-ordered matrix.
+    \param[OUT] iperm[i] stores the ID of the row that corresponds to 
+                the ith row in the re-ordered matrix.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_csr_ComputeBestFOrderingSymmetric(gk_csr_t *mat, int v, int type, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  ssize_t j, jj, *rowptr;
+  int i, k, u, nrows, nopen, ntodo;
+  int32_t *rowind, *perm, *degrees, *wdegrees, *sod, *level, *ot, *pos;
+  gk_i32pq_t *queue;
+
+  if (mat->nrows != mat->ncols) {
+    fprintf(stderr, "gk_csr_ComputeBestFOrderingSymmetric: The matrix needs to be square.\n");
+    return;
+  }
+  if (mat->nrows <= 0)
+    return;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+
+
+  /* the degree of the vertices in the closed list */
+  degrees = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: degrees");
+
+  /* the weighted degree of the vertices in the closed list for type==3 */
+  wdegrees = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: wdegrees");
+
+  /* the sum of differences for type==4 */
+  sod = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: sod");
+
+  /* the encountering level of a vertex type==5 */
+  level = gk_i32smalloc(nrows, 0, "gk_csr_ComputeBestFOrderingSymmetric: level");
+
+  /* The open+todo list of vertices. 
+     The vertices from [0..nopen] are the open vertices.
+     The vertices from [nopen..ntodo) are the todo vertices.
+     */
+  ot = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBestFOrderingSymmetric: ot"));
+
+  /* For a vertex that has not been explored, pos[i] is the position in the ot list. */
+  pos = gk_i32incset(nrows, 0, gk_i32malloc(nrows, "gk_csr_ComputeBestFOrderingSymmetric: pos"));
+
+  /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */
+  perm = gk_i32smalloc(nrows, -1, "gk_csr_ComputeBestFOrderingSymmetric: perm");
+
+  /* create the queue and put the starting vertex in it */
+  queue = gk_i32pqCreate(nrows);
+  gk_i32pqInsert(queue, v, 1);
+
+  /* put v at the front of the open list */
+  pos[0] = ot[0] = v;
+  pos[v] = ot[v] = 0;
+  nopen = 1;
+  ntodo = nrows;
+
+  /* start processing the nodes */
+  for (i=0; i<nrows; i++) {
+    if (nopen == 0) { /* deal with non-connected graphs */
+      gk_i32pqInsert(queue, ot[0], 1);  
+      nopen++;
+    }
+
+    if ((v = gk_i32pqGetTop(queue)) == -1)
+      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);
+
+    if (perm[v] != -1)
+      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
+    perm[v] = i;
+
+    if (ot[pos[v]] != v)
+      gk_errexit(SIGERR, "Something went wrong [ot[pos[%d]]!=%d.\n", v, v);
+    if (pos[v] >= nopen)
+      gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen);
+
+    /* remove v from the open list and re-arrange the todo part of the list */
+    ot[pos[v]]       = ot[nopen-1];
+    pos[ot[nopen-1]] = pos[v];
+    if (ntodo > nopen) {
+      ot[nopen-1]      = ot[ntodo-1];
+      pos[ot[ntodo-1]] = nopen-1;
+    }
+    nopen--;
+    ntodo--;
+
+    for (j=rowptr[v]; j<rowptr[v+1]; j++) {
+      u = rowind[j];
+      if (perm[u] == -1) {
+        /* update ot list, if u is not in the open list by putting it at the end
+           of the open list. */
+        if (degrees[u] == 0) {
+          ot[pos[u]]     = ot[nopen];
+          pos[ot[nopen]] = pos[u];
+          ot[nopen]      = u;
+          pos[u]         = nopen;
+          nopen++;
+
+          level[u] = level[v]+1;
+          gk_i32pqInsert(queue, u, 0);  
+        }
+
+
+        /* update the in-closed degree */
+        degrees[u]++;
+
+        /* update the queues based on the type */
+        switch (type) {
+          case 1: /* DFS */
+            gk_i32pqUpdate(queue, u, 1000*(i+1)+degrees[u]);
+            break;
+
+          case 2: /* Max in closed degree */
+            gk_i32pqUpdate(queue, u, degrees[u]);
+            break;
+
+          case 3: /* Sum of orders in closed list */
+            wdegrees[u] += i;
+            gk_i32pqUpdate(queue, u, wdegrees[u]);
+            break;
+
+          case 4: /* Sum of order-differences */
+            /* this is handled at the end of the loop */
+            ;
+            break;
+
+          case 5: /* BFS with in degree priority */
+            gk_i32pqUpdate(queue, u, -(1000*level[u] - degrees[u]));
+            break;
+
+          case 6: /* Hybrid of 1+2 */
+            gk_i32pqUpdate(queue, u, (i+1)*degrees[u]);
+            break;
+
+          default:
+            ;
+        }
+      }
+    }
+
+    if (type == 4) { /* update all the vertices in the open list */
+      for (j=0; j<nopen; j++) {
+        u = ot[j];
+        if (perm[u] != -1)
+          gk_errexit(SIGERR, "For i=%d, the open list contains a closed row: ot[%zd]=%d, perm[%d]=%d.\n", i, j, u, u, perm[u]);
+        sod[u] += degrees[u];
+        if (i<1000 || i%25==0)
+          gk_i32pqUpdate(queue, u, sod[u]);
+      }
+    }
+
+    /*
+    for (j=0; j<ntodo; j++) {
+      if (pos[ot[j]] != j)
+        gk_errexit(SIGERR, "pos[ot[%zd]] != %zd.\n", j, j);
+    }
+    */
+
+  }
+
+
+  /* time to decide what to return */
+  if (r_iperm != NULL) {
+    /* use the 'degrees' array to build the iperm array */
+    for (i=0; i<nrows; i++)
+      degrees[perm[i]] = i;
+
+    *r_iperm = degrees;
+    degrees = NULL;
+  }
+
+  if (r_perm != NULL) {
+    *r_perm = perm;
+    perm = NULL;
+  }
+
+
+
+
+  /* cleanup memory */
+  gk_i32pqDestroy(queue);
+  gk_free((void **)&perm, &degrees, &wdegrees, &sod, &ot, &pos, &level, LTERM);
+
+}
+
diff --git a/error.c b/error.c
new file mode 100644
index 0000000..e2a18cf
--- /dev/null
+++ b/error.c
@@ -0,0 +1,214 @@
+/*!
+\file  error.c
+\brief Various error-handling functions
+
+This file contains functions dealing with error reporting and termination
+
+\author George
+\date 1/1/2007
+\version\verbatim $Id: error.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#define _GK_ERROR_C_  /* this is needed to properly declare the gk_jub* variables
+                         as an extern function in GKlib.h */
+
+#include <GKlib.h>
+
+
+/* These are the jmp_buf for the graceful exit in case of severe errors.
+   Multiple buffers are defined to allow for recursive invokation. */
+#define MAX_JBUFS 128
+__thread int gk_cur_jbufs=-1;
+__thread jmp_buf gk_jbufs[MAX_JBUFS];
+__thread jmp_buf gk_jbuf;
+
+typedef void (*gksighandler_t)(int);
+
+/* These are the holders of the old singal handlers for the trapped signals */
+static __thread gksighandler_t old_SIGMEM_handler;  /* Custom signal */
+static __thread gksighandler_t old_SIGERR_handler;  /* Custom signal */
+static __thread gksighandler_t old_SIGMEM_handlers[MAX_JBUFS];  /* Custom signal */
+static __thread gksighandler_t old_SIGERR_handlers[MAX_JBUFS];  /* Custom signal */
+
+/* The following is used to control if the gk_errexit() will actually abort or not.
+   There is always a single copy of this variable */
+static int gk_exit_on_error = 1;
+
+
+/*************************************************************************/
+/*! This function sets the gk_exit_on_error variable 
+ */
+/*************************************************************************/
+void gk_set_exit_on_error(int value)
+{
+  gk_exit_on_error = value;
+}
+
+
+
+/*************************************************************************/
+/*! This function prints an error message and exits  
+ */
+/*************************************************************************/
+void errexit(char *f_str,...)
+{
+  va_list argp;
+
+  va_start(argp, f_str);
+  vfprintf(stderr, f_str, argp);
+  va_end(argp);
+
+  if (strlen(f_str) == 0 || f_str[strlen(f_str)-1] != '\n')
+        fprintf(stderr,"\n");
+  fflush(stderr);
+
+  if (gk_exit_on_error)
+    exit(-2);
+
+  /* abort(); */
+}
+
+
+/*************************************************************************/
+/*! This function prints an error message and raises a signum signal
+ */
+/*************************************************************************/
+void gk_errexit(int signum, char *f_str,...)
+{
+  va_list argp;
+
+  va_start(argp, f_str);
+  vfprintf(stderr, f_str, argp);
+  va_end(argp);
+
+  fprintf(stderr,"\n");
+  fflush(stderr);
+
+  if (gk_exit_on_error)
+    raise(signum);
+}
+
+
+/***************************************************************************/
+/*! This function sets a number of signal handlers and sets the return point 
+    of a longjmp
+*/
+/***************************************************************************/
+int gk_sigtrap() 
+{
+  if (gk_cur_jbufs+1 >= MAX_JBUFS)
+    return 0;
+
+  gk_cur_jbufs++;
+
+  old_SIGMEM_handlers[gk_cur_jbufs]  = signal(SIGMEM,  gk_sigthrow);
+  old_SIGERR_handlers[gk_cur_jbufs]  = signal(SIGERR,  gk_sigthrow);
+
+  return 1;
+}
+  
+
+/***************************************************************************/
+/*! This function sets the handlers for the signals to their default handlers
+ */
+/***************************************************************************/
+int gk_siguntrap() 
+{
+  if (gk_cur_jbufs == -1)
+    return 0;
+
+  signal(SIGMEM,  old_SIGMEM_handlers[gk_cur_jbufs]);
+  signal(SIGERR,  old_SIGERR_handlers[gk_cur_jbufs]);
+
+  gk_cur_jbufs--;
+
+  return 1;
+}
+  
+
+/*************************************************************************/
+/*! This function is the custome signal handler, which all it does is to
+    perform a longjump to the most recent saved environment 
+ */
+/*************************************************************************/
+void gk_sigthrow(int signum)
+{
+  longjmp(gk_jbufs[gk_cur_jbufs], signum);
+}
+  
+
+/***************************************************************************
+* This function sets a number of signal handlers and sets the return point 
+* of a longjmp
+****************************************************************************/
+void gk_SetSignalHandlers() 
+{
+  old_SIGMEM_handler = signal(SIGMEM,  gk_NonLocalExit_Handler);
+  old_SIGERR_handler = signal(SIGERR,  gk_NonLocalExit_Handler);
+}
+  
+
+/***************************************************************************
+* This function sets the handlers for the signals to their default handlers
+****************************************************************************/
+void gk_UnsetSignalHandlers() 
+{
+  signal(SIGMEM,  old_SIGMEM_handler);
+  signal(SIGERR,  old_SIGERR_handler);
+}
+  
+
+/*************************************************************************
+* This function is the handler for SIGUSR1 that implements the cleaning up 
+* process prior to a non-local exit.
+**************************************************************************/
+void gk_NonLocalExit_Handler(int signum)
+{
+  longjmp(gk_jbuf, signum);
+}
+  
+
+/*************************************************************************/
+/*! \brief Thread-safe implementation of strerror() */
+/**************************************************************************/
+char *gk_strerror(int errnum)
+{
+#if defined(WIN32) || defined(__MINGW32__)
+  return strerror(errnum);
+#else 
+#ifndef SUNOS
+  static __thread char buf[1024];
+
+  strerror_r(errnum, buf, 1024);
+
+  buf[1023] = '\0';
+  return buf;
+#else
+  return strerror(errnum);
+#endif
+#endif
+}
+
+
+
+/*************************************************************************
+* This function prints a backtrace of calling functions
+**************************************************************************/
+void PrintBackTrace()
+{
+#ifdef HAVE_EXECINFO_H
+  void *array[10];
+  int i, size;
+  char **strings;
+
+  size = backtrace(array, 10);
+  strings = backtrace_symbols(array, size);
+  
+  printf("Obtained %d stack frames.\n", size);
+  for (i=0; i<size; i++) {
+    printf("%s\n", strings[i]);
+  }
+  free(strings);
+#endif
+}
diff --git a/evaluate.c b/evaluate.c
new file mode 100644
index 0000000..ce805ce
--- /dev/null
+++ b/evaluate.c
@@ -0,0 +1,132 @@
+/*!
+  \file  evaluate.c
+  \brief Various routines to evaluate classification performance
+
+  \author George
+  \date 9/23/2008
+  \version\verbatim $Id: evaluate.c 13328 2012-12-31 14:57:40Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/**********************************************************************
+ * This function computes the max accuracy score of a ranked list,
+ * given +1/-1 class list
+ **********************************************************************/
+float ComputeAccuracy(int n, gk_fkv_t *list)
+{
+  int i, P, N, TP, FN = 0;
+  float bAccuracy = 0.0;
+  float acc;
+  
+  for (P=0, i=0;i<n;i++)
+    P += (list[i].val == 1? 1 : 0);
+  N = n - P;
+  
+  TP = FN = 0;
+  
+  for(i=0; i<n; i++){
+    if (list[i].val == 1)
+      TP++; 
+    else
+      FN++;
+    
+    acc = (TP + N - FN) * 100.0/ (P + N) ;
+    if (acc > bAccuracy)
+      bAccuracy = acc;
+  }
+  
+  return bAccuracy;
+}
+
+
+/*****************************************************************************
+ * This function computes the ROC score of a ranked list, given a +1/-1 class
+ * list.
+ ******************************************************************************/
+float ComputeROCn(int n, int maxN, gk_fkv_t *list)
+{
+  int i, P, TP, FP, TPprev, FPprev, AUC;
+  float prev;
+  
+  FP = TP = FPprev = TPprev = AUC = 0;
+  prev = list[0].key -1;
+  
+  for (P=0, i=0; i<n; i++)
+    P += (list[i].val == 1 ? 1 : 0);
+  
+  for (i=0; i<n && FP < maxN; i++) {
+    if (list[i].key != prev) {
+      AUC += (TP+TPprev)*(FP-FPprev)/2;
+      prev = list[i].key;
+      FPprev = FP;
+      TPprev = TP;
+    }
+    if (list[i].val == 1) 
+      TP++;
+    else {
+      FP++;
+    }
+  }
+  AUC += (TP+TPprev)*(FP-FPprev)/2;
+
+  return (TP*FP > 0 ? (float)(1.0*AUC/(P*FP)) : 0.0);
+}
+
+
+/*****************************************************************************
+* This function computes the median rate of false positive for each positive
+* instance.
+******************************************************************************/
+float ComputeMedianRFP(int n, gk_fkv_t *list)
+{
+  int i, P, N, TP, FP;
+
+  P = N = 0;
+  for (i=0; i<n; i++) {
+    if (list[i].val == 1)
+      P++;
+    else
+      N++;
+  }
+  
+  FP = TP = 0;
+  for (i=0; i<n && TP < (P+1)/2; i++) {
+    if (list[i].val == 1) 
+      TP++;
+    else 
+      FP++;
+  }
+  
+  return 1.0*FP/N;
+}
+
+/*********************************************************
+ * Compute the mean
+ ********************************************************/
+float ComputeMean (int n, float *values)
+{
+  int i;
+  float mean = 0.0;
+
+  for(i=0; i < n; i++)
+    mean += values[i];
+  
+  return 1.0 * mean/ n;
+}
+
+/********************************************************
+ * Compute the standard deviation
+ ********************************************************/
+float ComputeStdDev(int  n, float *values)
+{
+  int i;
+  float mean = ComputeMean(n, values);
+  float stdDev = 0;
+  
+  for(i=0;i<n;i++){
+    stdDev += (values[i] - mean)* (values[i] - mean);
+  }
+  
+  return sqrt(1.0 * stdDev/n);
+}
diff --git a/fkvkselect.c b/fkvkselect.c
new file mode 100644
index 0000000..b1238ce
--- /dev/null
+++ b/fkvkselect.c
@@ -0,0 +1,142 @@
+/*!
+\file  dfkvkselect.c
+\brief Sorts only the largest k values
+ 
+\date   Started 7/14/00
+\author George
+\version\verbatim $Id: fkvkselect.c 10711 2011-08-31 22:23:04Z karypis $\endverbatim
+*/
+
+
+#include <GKlib.h>
+
+/* Byte-wise swap two items of size SIZE. */
+#define QSSWAP(a, b, stmp) do { stmp = (a); (a) = (b); (b) = stmp; } while (0)
+
+
+/******************************************************************************/
+/*! This function puts the 'topk' largest values in the beginning of the array */
+/*******************************************************************************/
+int gk_dfkvkselect(size_t n, int topk, gk_fkv_t *cand)
+{
+  int i, j, lo, hi, mid;
+  gk_fkv_t stmp;
+  float pivot;
+
+  if (n <= topk)
+    return n; /* return if the array has fewer elements than we want */
+
+  for (lo=0, hi=n-1; lo < hi;) {
+    mid = lo + ((hi-lo) >> 1);
+
+    /* select the median */
+    if (cand[lo].key < cand[mid].key)
+      mid = lo;
+    if (cand[hi].key > cand[mid].key)
+      mid = hi;
+    else 
+      goto jump_over;
+    if (cand[lo].key < cand[mid].key)
+      mid = lo;
+
+jump_over:
+    QSSWAP(cand[mid], cand[hi], stmp);
+    pivot = cand[hi].key;
+
+    /* the partitioning algorithm */
+    for (i=lo-1, j=lo; j<hi; j++) {
+      if (cand[j].key >= pivot) {
+        i++;
+        QSSWAP(cand[i], cand[j], stmp);
+      }
+    }
+    i++;
+    QSSWAP(cand[i], cand[hi], stmp);
+
+
+    if (i > topk) 
+      hi = i-1;
+    else if (i < topk)
+      lo = i+1;
+    else
+      break;
+  }
+
+/*
+  if (cand[lo].key < cand[hi].key)
+    printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key);
+
+
+  for (i=topk; i<n; i++) {
+    for (j=0; j<topk; j++)
+      if (cand[i].key > cand[j].key)
+        printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi);
+  }
+*/
+
+  return topk;
+}
+
+
+/******************************************************************************/
+/*! This function puts the 'topk' smallest values in the beginning of the array */
+/*******************************************************************************/
+int gk_ifkvkselect(size_t n, int topk, gk_fkv_t *cand)
+{
+  int i, j, lo, hi, mid;
+  gk_fkv_t stmp;
+  float pivot;
+
+  if (n <= topk)
+    return n; /* return if the array has fewer elements than we want */
+
+  for (lo=0, hi=n-1; lo < hi;) {
+    mid = lo + ((hi-lo) >> 1);
+
+    /* select the median */
+    if (cand[lo].key > cand[mid].key)
+      mid = lo;
+    if (cand[hi].key < cand[mid].key)
+      mid = hi;
+    else 
+      goto jump_over;
+    if (cand[lo].key > cand[mid].key)
+      mid = lo;
+
+jump_over:
+    QSSWAP(cand[mid], cand[hi], stmp);
+    pivot = cand[hi].key;
+
+    /* the partitioning algorithm */
+    for (i=lo-1, j=lo; j<hi; j++) {
+      if (cand[j].key <= pivot) {
+        i++;
+        QSSWAP(cand[i], cand[j], stmp);
+      }
+    }
+    i++;
+    QSSWAP(cand[i], cand[hi], stmp);
+
+
+    if (i > topk) 
+      hi = i-1;
+    else if (i < topk)
+      lo = i+1;
+    else
+      break;
+  }
+
+/*
+  if (cand[lo].key > cand[hi].key)
+    printf("Hmm Error: %d %d %d %f %f\n", i, lo, hi, cand[lo].key, cand[hi].key);
+
+
+  for (i=topk; i<n; i++) {
+    for (j=0; j<topk; j++)
+      if (cand[i].key < cand[j].key)
+        printf("Hmm Error: %d %d %f %f %d %d\n", i, j, cand[i].key, cand[j].key, lo, hi);
+  }
+*/
+
+  return topk;
+}
diff --git a/fs.c b/fs.c
new file mode 100644
index 0000000..21081dd
--- /dev/null
+++ b/fs.c
@@ -0,0 +1,225 @@
+/*!
+\file  fs.c
+\brief Various file-system functions.
+
+This file contains various functions that deal with interfacing with 
+the filesystem in a portable way.
+
+\date Started 4/10/95
+\author George
+\version\verbatim $Id: fs.c 14332 2013-05-18 12:22:57Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************
+* This function checks if a file exists
+**************************************************************************/
+int gk_fexists(char *fname)
+{
+  struct stat status;
+
+  if (stat(fname, &status) == -1)
+    return 0;
+
+  return S_ISREG(status.st_mode);
+}
+
+
+/*************************************************************************
+* This function checks if a directory exists
+**************************************************************************/
+int gk_dexists(char *dirname)
+{
+  struct stat status;
+
+  if (stat(dirname, &status) == -1)
+    return 0;
+
+  return S_ISDIR(status.st_mode);
+}
+
+
+/*************************************************************************/
+/*! \brief Returns the size of the file in bytes
+
+This function returns the size of a file as a 64 bit integer. If there 
+were any errors in stat'ing the file, -1 is returned.
+\note That due to the -1 return code, the maximum file size is limited to
+      63 bits (which I guess is okay for now).
+*/
+/**************************************************************************/
+ssize_t gk_getfsize(char *filename)
+{
+  struct stat status;
+
+  if (stat(filename, &status) == -1) 
+    return -1;
+
+  return (size_t)(status.st_size);
+}
+
+
+/*************************************************************************/
+/*! This function gets some basic statistics about the file. 
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+    \param r_ntokens is the number of tokens in the file. If it is NULL,
+           this information is not returned.
+    \param r_max_nlntokens is the maximum number of tokens in any line
+           in the file. If it is NULL this information is not returned.
+    \param r_nbytes is the number of bytes in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, 
+        size_t *r_max_nlntokens, size_t *r_nbytes)
+{
+  size_t nlines=0, ntokens=0, max_nlntokens=0, nbytes=0, oldntokens=0, nread;
+  int intoken=0;
+  char buffer[4097], *cptr;
+  FILE *fpin;
+
+  fpin = gk_fopen(fname, "r", "gk_GetFileStats");
+
+  while (!feof(fpin)) {
+    nread = fread(buffer, sizeof(char), 4096, fpin);
+    nbytes += nread;
+
+    buffer[nread] = '\0';  /* There is space for this one */
+    for (cptr=buffer; *cptr!='\0'; cptr++) {
+      if (*cptr == '\n') {
+        nlines++;
+        ntokens += intoken;
+        intoken = 0;
+        if (max_nlntokens < ntokens-oldntokens)
+          max_nlntokens = ntokens-oldntokens;
+        oldntokens = ntokens;
+      }
+      else if (*cptr == ' ' || *cptr == '\t') {
+        ntokens += intoken;
+        intoken = 0;
+      }
+      else {
+        intoken = 1;
+      }
+    }
+  }
+  ntokens += intoken;
+  if (max_nlntokens < ntokens-oldntokens)
+    max_nlntokens = ntokens-oldntokens;
+
+  gk_fclose(fpin);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+  if (r_ntokens != NULL)
+    *r_ntokens = ntokens;
+  if (r_max_nlntokens != NULL)
+    *r_max_nlntokens = max_nlntokens;
+  if (r_nbytes != NULL)
+    *r_nbytes  = nbytes;
+}
+
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and just returns a string containing just the basename of the file.
+* The basename is derived from the actual filename by stripping the last
+* .ext part.
+**************************************************************************/
+char *gk_getbasename(char *path)
+{
+  char *startptr, *endptr;
+  char *basename;
+
+  if ((startptr = strrchr(path, '/')) == NULL) 
+    startptr = path;
+  else 
+    startptr = startptr+1;
+
+  basename = gk_strdup(startptr);
+
+  if ((endptr = strrchr(basename, '.')) != NULL) 
+    *endptr = '\0';
+
+  return basename;
+}
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and just returns a string corresponding to its file extension. The
+* extension of a file is considered to be the string right after the 
+* last '.' character.
+**************************************************************************/
+char *gk_getextname(char *path)
+{
+  char *startptr;
+
+  if ((startptr = strrchr(path, '.')) == NULL) 
+    return gk_strdup(path);
+  else 
+    return gk_strdup(startptr+1);
+}
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and just returns a string containing just the filename.
+**************************************************************************/
+char *gk_getfilename(char *path)
+{
+  char *startptr;
+
+  if ((startptr = strrchr(path, '/')) == NULL) 
+    return gk_strdup(path);
+  else 
+    return gk_strdup(startptr+1);
+}
+
+/*************************************************************************
+* This function takes in a potentially full path specification of a file
+* and extracts the directory path component if it exists, otherwise it
+* returns "./" as the path. The memory for it is dynamically allocated.
+**************************************************************************/
+char *getpathname(char *path)
+{
+  char *endptr, *tmp;
+
+  if ((endptr = strrchr(path, '/')) == NULL) {
+    return gk_strdup(".");
+  }
+  else  {
+    tmp = gk_strdup(path);
+    *(strrchr(tmp, '/')) = '\0';
+    return tmp;
+  }
+}
+
+
+
+/*************************************************************************
+* This function creates a path
+**************************************************************************/
+int gk_mkpath(char *pathname)
+{
+  char tmp[2048];
+
+  sprintf(tmp, "mkdir -p %s", pathname);
+  return system(tmp);
+}
+
+
+/*************************************************************************
+* This function deletes a directory tree and all of its contents
+**************************************************************************/
+int gk_rmpath(char *pathname)
+{
+  char tmp[2048];
+
+  sprintf(tmp, "rm -r %s", pathname);
+  return system(tmp);
+}
diff --git a/getopt.c b/getopt.c
new file mode 100644
index 0000000..2e7e042
--- /dev/null
+++ b/getopt.c
@@ -0,0 +1,855 @@
+/*************************************************************************/
+/*! \file getopt.c
+\brief Command line parsing 
+
+This file contains a implementation of GNU's Getopt facility. The purpose
+for including it here is to ensure portability across different unix- and
+windows-based systems.
+
+\warning 
+The implementation provided here uses the \c gk_ prefix for all variables
+used by the standard Getopt facility to communicate with the program.
+So, do read the documentation here.
+
+\verbatim
+   Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001
+   Free Software Foundation, Inc. This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  
+\endverbatim
+*/
+/*************************************************************************/
+
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/* Local function prototypes */
+/*************************************************************************/
+static void exchange (char **);
+static char *gk_getopt_initialize (int, char **, char *);
+static int gk_getopt_internal(int argc, char **argv, char *optstring, 
+        struct gk_option *longopts, int *longind, int long_only);
+
+
+
+/*************************************************************************/
+/*! \brief For communication arguments to the caller.
+
+This variable is set by getopt to point at the value of the option argument, 
+for those options that accept arguments.
+*/
+/*************************************************************************/
+char *gk_optarg;
+
+
+/*************************************************************************/
+/*! \brief Index in ARGV of the next element to be scanned. 
+
+This variable is set by getopt to the index of the next element of the argv 
+array to be processed. Once getopt has found all of the option arguments, 
+you can use this variable to determine where the remaining non-option arguments 
+begin. 
+*/
+/*************************************************************************/
+int gk_optind = 1; 
+
+
+/*************************************************************************/
+/*! \brief Controls error reporting for unrecognized options.  
+
+If the value of this variable is nonzero, then getopt prints an error 
+message to the standard error stream if it encounters an unknown option 
+character or an option with a missing required argument. This is the default 
+behavior. If you set this variable to zero, getopt does not print any messages,
+but it still returns the character ? to indicate an error.
+*/
+/*************************************************************************/
+int gk_opterr = 1;
+
+
+/*************************************************************************/
+/*! \brief Stores unknown option characters
+
+When getopt encounters an unknown option character or an option with a 
+missing required argument, it stores that option character in this 
+variable. You can use this for providing your own diagnostic messages.
+*/
+/*************************************************************************/
+int gk_optopt = '?';
+
+
+/*************************************************************************/
+/*
+Records that the getopt facility has been initialized.
+*/
+/*************************************************************************/
+int gk_getopt_initialized;
+
+
+/*************************************************************************/
+/*
+The next char to be scanned in the option-element in which the last option 
+character we returned was found.  This allows us to pick up the scan where 
+we left off.
+
+If this is zero, or a null string, it means resume the scan by advancing 
+to the next ARGV-element.  
+*/
+/*************************************************************************/
+static char *nextchar;
+
+
+/*************************************************************************/
+/*
+Value of POSIXLY_CORRECT environment variable.  
+*/
+/*************************************************************************/
+static char *posixly_correct;
+
+
+/*************************************************************************/
+/*
+Describe how to deal with options that follow non-option ARGV-elements.
+
+If the caller did not specify anything, the default is REQUIRE_ORDER if 
+the environment variable POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+REQUIRE_ORDER means don't recognize them as options; stop option processing 
+when the first non-option is seen.  This is what Unix does.  This mode of 
+operation is selected by either setting the environment variable 
+POSIXLY_CORRECT, or using `+' as the first character of the list of 
+option characters.
+
+PERMUTE is the default.  We permute the contents of ARGV as we scan, so 
+that eventually all the non-options are at the end.  This allows options
+to be given in any order, even with programs that were not written to
+expect this.
+
+RETURN_IN_ORDER is an option available to programs that were written
+to expect options and other ARGV-elements in any order and that care 
+about the ordering of the two.  We describe each non-option ARGV-element
+as if it were the argument of an option with character code 1.
+Using `-' as the first character of the list of option characters
+selects this mode of operation.
+
+The special argument `--' forces an end of option-scanning regardless
+of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+`--' can cause `getopt' to return -1 with `gk_optind' != ARGC.  
+*/
+/*************************************************************************/
+static enum
+{
+  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+
+
+/*************************************************************************/
+/* 
+Describe the part of ARGV that contains non-options that have
+been skipped.  `first_nonopt' is the index in ARGV of the first of them;
+`last_nonopt' is the index after the last of them.  
+*/
+/*************************************************************************/
+static int first_nonopt;
+static int last_nonopt;
+
+
+
+
+
+/*************************************************************************/
+/*
+Handle permutation of arguments.  
+
+Exchange two adjacent subsequences of ARGV. 
+One subsequence is elements [first_nonopt,last_nonopt)
+which contains all the non-options that have been skipped so far.
+The other is elements [last_nonopt,gk_optind), which contains all
+the options processed since those non-options were skipped.
+
+`first_nonopt' and `last_nonopt' are relocated so that they describe
+the new indices of the non-options in ARGV after they are moved.  
+*/
+/*************************************************************************/
+static void exchange (char **argv)
+{
+  int bottom = first_nonopt;
+  int middle = last_nonopt;
+  int top = gk_optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+  while (top > middle && middle > bottom) {
+    if (top - middle > middle - bottom) {
+      /* Bottom segment is the short one.  */
+      int len = middle - bottom;
+      register int i;
+
+      /* Swap it with the top part of the top segment.  */
+      for (i = 0; i < len; i++) {
+	tem = argv[bottom + i];
+	argv[bottom + i] = argv[top - (middle - bottom) + i];
+	argv[top - (middle - bottom) + i] = tem;
+      }
+      /* Exclude the moved bottom segment from further swapping.  */
+      top -= len;
+    }
+    else {
+      /* Top segment is the short one.  */
+      int len = top - middle;
+      register int i;
+
+      /* Swap it with the bottom part of the bottom segment.  */
+      for (i = 0; i < len; i++) {
+        tem = argv[bottom + i];
+        argv[bottom + i] = argv[middle + i];
+        argv[middle + i] = tem;
+      }
+      /* Exclude the moved top segment from further swapping.  */
+      bottom += len;
+    }
+  }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  first_nonopt += (gk_optind - last_nonopt);
+  last_nonopt = gk_optind;
+}
+
+
+
+/*************************************************************************/
+/*
+Initialize the internal data when the first call is made.  
+*/
+/*************************************************************************/
+static char *gk_getopt_initialize (int argc, char **argv, char *optstring)
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  first_nonopt = last_nonopt = gk_optind;
+
+  nextchar = NULL;
+
+  posixly_correct = getenv("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+  if (optstring[0] == '-') {
+    ordering = RETURN_IN_ORDER;
+    ++optstring;
+  }
+  else if (optstring[0] == '+') {
+    ordering = REQUIRE_ORDER;
+    ++optstring;
+  }
+  else if (posixly_correct != NULL)
+    ordering = REQUIRE_ORDER;
+  else
+    ordering = PERMUTE;
+
+  return optstring;
+}
+
+
+/*************************************************************************/
+/*
+   Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `gk_optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns -1.
+   Then `gk_optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `gk_opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `gk_optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `gk_optarg', otherwise `gk_optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   LONGOPTS is a vector of `struct gk_option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  
+*/
+/*************************************************************************/
+static int gk_getopt_internal(int argc, char **argv, char *optstring, 
+        struct gk_option *longopts, int *longind, int long_only)
+{
+  int print_errors = gk_opterr;
+  if (optstring[0] == ':')
+    print_errors = 0;
+
+  if (argc < 1)
+    return -1;
+
+  gk_optarg = NULL;
+
+  if (gk_optind == 0 || !gk_getopt_initialized) {
+    if (gk_optind == 0)
+      gk_optind = 1;	/* Don't scan ARGV[0], the program name.  */
+
+    optstring = gk_getopt_initialize (argc, argv, optstring);
+    gk_getopt_initialized = 1;
+  }
+
+  /* Test whether ARGV[gk_optind] points to a non-option argument.
+     Either it does not have option syntax, or there is an environment flag
+     from the shell indicating it is not an option.  The later information
+     is only used when the used in the GNU libc.  */
+# define NONOPTION_P (argv[gk_optind][0] != '-' || argv[gk_optind][1] == '\0')
+
+  if (nextchar == NULL || *nextchar == '\0') {
+    /* Advance to the next ARGV-element.  */
+
+    /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+       moved back by the user (who may also have changed the arguments).  */
+    if (last_nonopt > gk_optind)
+      last_nonopt = gk_optind;
+    if (first_nonopt > gk_optind)
+      first_nonopt = gk_optind;
+
+    if (ordering == PERMUTE) {
+      /* If we have just processed some options following some non-options,
+	 exchange them so that the options come first.  */
+
+      if (first_nonopt != last_nonopt && last_nonopt != gk_optind)
+	exchange ((char **) argv);
+      else if (last_nonopt != gk_optind)
+	first_nonopt = gk_optind;
+
+      /* Skip any additional non-options
+	 and extend the range of non-options previously skipped.  */
+
+      while (gk_optind < argc && NONOPTION_P)
+        gk_optind++;
+
+      last_nonopt = gk_optind;
+    }
+
+    /* The special ARGV-element `--' means premature end of options.
+       Skip it like a null option,
+       then exchange with previous non-options as if it were an option,
+       then skip everything else like a non-option.  */
+
+    if (gk_optind != argc && !strcmp (argv[gk_optind], "--")) {
+      gk_optind++;
+
+      if (first_nonopt != last_nonopt && last_nonopt != gk_optind)
+        exchange ((char **) argv);
+      else if (first_nonopt == last_nonopt)
+        first_nonopt = gk_optind;
+      last_nonopt = argc;
+
+      gk_optind = argc;
+    }
+
+    /* If we have done all the ARGV-elements, stop the scan
+       and back over any non-options that we skipped and permuted.  */
+
+    if (gk_optind == argc) {
+      /* Set the next-arg-index to point at the non-options
+	 that we previously skipped, so the caller will digest them.  */
+      if (first_nonopt != last_nonopt)
+	gk_optind = first_nonopt;
+      return -1;
+    }
+
+    /* If we have come to a non-option and did not permute it,
+       either stop the scan or describe it to the caller and pass it by.  */
+
+    if (NONOPTION_P) {
+      if (ordering == REQUIRE_ORDER)
+	return -1;
+      gk_optarg = argv[gk_optind++];
+      return 1;
+    }
+
+    /* We have found another option-ARGV-element.
+       Skip the initial punctuation.  */
+
+    nextchar = (argv[gk_optind] + 1 + (longopts != NULL && argv[gk_optind][1] == '-'));
+  }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL && (argv[gk_optind][1] == '-' || (long_only && (argv[gk_optind][2] || !strchr(optstring, argv[gk_optind][1]))))) {
+    char *nameend;
+    struct gk_option *p;
+    struct gk_option *pfound = NULL;
+    int exact = 0;
+    int ambig = 0;
+    int indfound = -1;
+    int option_index;
+
+    for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+      /* Do nothing.  */ ;
+
+    /* Test all long options for either exact match or abbreviated matches.  */
+    for (p = longopts, option_index = 0; p->name; p++, option_index++) {
+      if (!strncmp (p->name, nextchar, nameend - nextchar)) {
+        if ((unsigned int) (nameend - nextchar) == (unsigned int) strlen (p->name)) {
+	  /* Exact match found.  */
+	  pfound = p;
+	  indfound = option_index;
+	  exact = 1;
+	  break;
+	}
+	else if (pfound == NULL) {
+          /* First nonexact match found.  */
+	  pfound = p;
+	  indfound = option_index;
+	}
+	else if (long_only || pfound->has_arg != p->has_arg || pfound->flag != p->flag || pfound->val != p->val)
+	  /* Second or later nonexact match found.  */
+	  ambig = 1;
+      }
+    }
+
+    if (ambig && !exact) {
+      if (print_errors)
+        fprintf(stderr, "%s: option `%s' is ambiguous\n", argv[0], argv[gk_optind]);
+
+      nextchar += strlen (nextchar);
+      gk_optind++;
+      gk_optopt = 0;
+      return '?';
+    }
+
+    if (pfound != NULL) {
+      option_index = indfound;
+      gk_optind++;
+      if (*nameend) {
+	/* Don't test has_arg with >, because some C compilers don't allow it to be used on enums.  */
+	if (pfound->has_arg)
+	  gk_optarg = nameend + 1;
+	else {
+	  if (print_errors) {
+	    if (argv[gk_optind - 1][1] == '-')
+	      /* --option */
+	      fprintf(stderr, "%s: option `--%s' doesn't allow an argument\n", argv[0], pfound->name);
+	    else
+	      /* +option or -option */
+	      fprintf(stderr, "%s: option `%c%s' doesn't allow an argument\n", argv[0], argv[gk_optind - 1][0], pfound->name);
+	  }
+
+	  nextchar += strlen (nextchar);
+
+	  gk_optopt = pfound->val;
+	  return '?';
+	}
+      }
+      else if (pfound->has_arg == 1) {
+	if (gk_optind < argc)
+	  gk_optarg = argv[gk_optind++];
+	else {
+	  if (print_errors)
+	    fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]);
+	  nextchar += strlen (nextchar);
+	  gk_optopt = pfound->val;
+	  return optstring[0] == ':' ? ':' : '?';
+	}
+      }
+      nextchar += strlen (nextchar);
+      if (longind != NULL)
+        *longind = option_index;
+      if (pfound->flag) {
+	*(pfound->flag) = pfound->val;
+	return 0;
+      }
+      return pfound->val;
+    }
+
+    /* Can't find it as a long option.  If this is not getopt_long_only,
+       or the option starts with '--' or is not a valid short
+        option, then it's an error. Otherwise interpret it as a short option.  */
+    if (!long_only || argv[gk_optind][1] == '-' || strchr(optstring, *nextchar) == NULL) {
+      if (print_errors) {
+	if (argv[gk_optind][1] == '-')
+	  /* --option */
+	  fprintf(stderr, "%s: unrecognized option `--%s'\n", argv[0], nextchar);
+	else
+	  /* +option or -option */
+	  fprintf(stderr, "%s: unrecognized option `%c%s'\n", argv[0], argv[gk_optind][0], nextchar);
+      }
+      nextchar = (char *) "";
+      gk_optind++;
+      gk_optopt = 0;
+      return '?';
+    }
+  }
+
+  /* Look at and handle the next short option-character.  */
+  {
+    char c = *nextchar++;
+    char *temp = strchr(optstring, c);
+
+    /* Increment `gk_optind' when we start to process its last character.  */
+    if (*nextchar == '\0')
+      ++gk_optind;
+
+    if (temp == NULL || c == ':') {
+      if (print_errors) {
+        if (posixly_correct)
+	  /* 1003.2 specifies the format of this message.  */
+	  fprintf(stderr, "%s: illegal option -- %c\n", argv[0], c);
+	else
+	  fprintf(stderr, "%s: invalid option -- %c\n", argv[0], c);
+      }
+      gk_optopt = c;
+      return '?';
+    }
+
+    /* Convenience. Treat POSIX -W foo same as long option --foo */
+    if (temp[0] == 'W' && temp[1] == ';') {
+      char *nameend;
+      struct gk_option *p;
+      struct gk_option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound = 0;
+      int option_index;
+
+      /* This is an option that requires an argument.  */
+      if (*nextchar != '\0') {
+	gk_optarg = nextchar;
+	/* If we end this ARGV-element by taking the rest as an arg,
+	   we must advance to the next element now.  */
+	gk_optind++;
+      }
+      else if (gk_optind == argc) {
+	if (print_errors) {
+	  /* 1003.2 specifies the format of this message.  */
+	  fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c);
+	}
+	gk_optopt = c;
+	if (optstring[0] == ':')
+	  c = ':';
+	else
+	  c = '?';
+	return c;
+      }
+      else
+	/* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument.  */
+	gk_optarg = argv[gk_optind++];
+
+      /* gk_optarg is now the argument, see if it's in the table of longopts.  */
+
+      for (nextchar = nameend = gk_optarg; *nameend && *nameend != '='; nameend++)
+	/* Do nothing.  */ ;
+
+      /* Test all long options for either exact match or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++) {
+	if (!strncmp (p->name, nextchar, nameend - nextchar)) {
+	  if ((unsigned int) (nameend - nextchar) == strlen (p->name)) {
+	    /* Exact match found.  */
+	    pfound = p;
+	    indfound = option_index;
+	    exact = 1;
+	    break;
+	  }
+	  else if (pfound == NULL) {
+	    /* First nonexact match found.  */
+	    pfound = p;
+	    indfound = option_index;
+	  }
+	  else
+	    /* Second or later nonexact match found.  */
+	    ambig = 1;
+	}
+      }
+      if (ambig && !exact) {
+	if (print_errors)
+	  fprintf(stderr, "%s: option `-W %s' is ambiguous\n", argv[0], argv[gk_optind]);
+	nextchar += strlen (nextchar);
+	gk_optind++;
+	return '?';
+      }
+      if (pfound != NULL) {
+	option_index = indfound;
+	if (*nameend) {
+	  /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums.  */
+	  if (pfound->has_arg)
+	    gk_optarg = nameend + 1;
+	  else {
+	    if (print_errors)
+	      fprintf(stderr, "%s: option `-W %s' doesn't allow an argument\n", argv[0], pfound->name);
+
+	    nextchar += strlen (nextchar);
+	    return '?';
+	  }
+	}
+	else if (pfound->has_arg == 1) {
+	  if (gk_optind < argc)
+	    gk_optarg = argv[gk_optind++];
+	  else {
+	    if (print_errors)
+	      fprintf(stderr, "%s: option `%s' requires an argument\n", argv[0], argv[gk_optind - 1]);
+	    nextchar += strlen (nextchar);
+	    return optstring[0] == ':' ? ':' : '?';
+	  }
+        }
+	nextchar += strlen (nextchar);
+	if (longind != NULL)
+	  *longind = option_index;
+	if (pfound->flag) {
+	  *(pfound->flag) = pfound->val;
+	  return 0;
+	}
+	return pfound->val;
+      }
+      nextchar = NULL;
+      return 'W';	/* Let the application handle it.   */
+    }
+
+    if (temp[1] == ':') {
+      if (temp[2] == ':') {
+	/* This is an option that accepts an argument optionally.  */
+	if (*nextchar != '\0') {
+  	  gk_optarg = nextchar;
+	  gk_optind++;
+	}
+	else
+	  gk_optarg = NULL;
+	nextchar = NULL;
+      }
+      else {
+	/* This is an option that requires an argument.  */
+	if (*nextchar != '\0') {
+	  gk_optarg = nextchar;
+	  /* If we end this ARGV-element by taking the rest as an arg, we must advance to the next element now.  */
+	  gk_optind++;
+	}
+	else if (gk_optind == argc) {
+	  if (print_errors) {
+	    /* 1003.2 specifies the format of this message.  */
+	    fprintf(stderr, "%s: option requires an argument -- %c\n", argv[0], c);
+	  }
+	  gk_optopt = c;
+	  if (optstring[0] == ':')
+	    c = ':';
+	  else
+	    c = '?';
+	}
+	else
+	  /* We already incremented `gk_optind' once; increment it again when taking next ARGV-elt as argument.  */
+	  gk_optarg = argv[gk_optind++];
+	nextchar = NULL;
+      }
+    }
+    return c;
+  }
+}
+
+
+
+/*************************************************************************/
+/*! \brief Parse command-line arguments
+
+The gk_getopt() function gets the next option argument from the argument 
+list specified by the \c argv and \c argc arguments. Normally these values 
+come directly from the arguments received by main().
+
+\param argc is the number of command line arguments passed to main().
+\param argv is an array of strings storing the above command line 
+       arguments.
+\param options is a string that specifies the option characters that 
+       are valid for this program. An option character in this string 
+       can be followed by a colon (`:') to indicate that it takes a 
+       required argument. If an option character is followed by two 
+       colons (`::'), its argument is optional; this is a GNU extension.
+
+\return  
+It returns the option character for the next command line option. When no 
+more option arguments are available, it returns -1. There may still be 
+more non-option arguments; you must compare the external variable 
+#gk_optind against the \c argc parameter to check this.
+
+\return  
+If the option has an argument, gk_getopt() returns the argument by storing 
+it in the variable #gk_optarg. You don't ordinarily need to copy the 
+#gk_optarg string, since it is a pointer into the original \c argv array, 
+not into a static area that might be overwritten.
+
+\return  
+If gk_getopt() finds an option character in \c argv that was not included 
+in options, or a missing option argument, it returns `?' and sets the 
+external variable #gk_optopt to the actual option character. 
+If the first character of options is a colon (`:'), then gk_getopt() 
+returns `:' instead of `?' to indicate a missing option argument. 
+In addition, if the external variable #gk_opterr is nonzero (which is 
+the default), gk_getopt() prints an error message.  This variable is 
+set by gk_getopt() to point at the value of the option argument, 
+for those options that accept arguments.
+
+
+gk_getopt() has three ways to deal with options that follow non-options 
+\c argv elements. The special argument <tt>`--'</tt> forces in all cases 
+the end of option scanning.
+  - The default is to permute the contents of \c argv while scanning it 
+    so that eventually all the non-options are at the end. This allows 
+    options to be given in any order, even with programs that were not 
+    written to expect this.
+  - If the options argument string begins with a hyphen (`-'), this is 
+    treated specially. It permits arguments that are not options to be 
+    returned as if they were associated with option character `\\1'.
+  - POSIX demands the following behavior: The first non-option stops 
+    option processing. This mode is selected by either setting the 
+    environment variable POSIXLY_CORRECT or beginning the options
+    argument string with a plus sign (`+'). 
+
+*/
+/*************************************************************************/
+int gk_getopt(int argc, char **argv, char *options)
+{
+  return gk_getopt_internal(argc, argv, options, NULL, NULL, 0);
+}
+
+
+/*************************************************************************/
+/*! \brief Parse command-line arguments with long options
+
+This function accepts GNU-style long options as well as single-character 
+options. 
+
+\param argc is the number of command line arguments passed to main().
+\param argv is an array of strings storing the above command line 
+       arguments.
+\param options describes the short options to accept, just as it does 
+       in gk_getopt(). 
+\param long_options describes the long options to accept. See the 
+       defintion of ::gk_option for more information.
+\param opt_index this is a returned variable.  For any long option, 
+       gk_getopt_long() tells you the index in the array \c long_options 
+       of the options definition, by storing it into <tt>*opt_index</tt>. 
+       You can get the name of the option with <tt>longopts[*opt_index].name</tt>. 
+       So you can distinguish among long options either by the values 
+       in their val fields or by their indices. You can also distinguish 
+       in this way among long options that set flags.
+
+
+\return
+When gk_getopt_long() encounters a short option, it does the same thing 
+that gk_getopt() would do: it returns the character code for the option, 
+and stores the options argument (if it has one) in #gk_optarg.
+
+\return
+When gk_getopt_long() encounters a long option, it takes actions based 
+on the flag and val fields of the definition of that option.
+
+\return
+If flag is a null pointer, then gk_getopt_long() returns the contents 
+of val to indicate which option it found. You should arrange distinct 
+values in the val field for options with different meanings, so you 
+can decode these values after gk_getopt_long() returns. If the long 
+option is equivalent to a short option, you can use the short option's 
+character code in val.
+
+\return
+If flag is not a null pointer, that means this option should just set 
+a flag in the program. The flag is a variable of type int that you 
+define. Put the address of the flag in the flag field. Put in the 
+val field the value you would like this option to store in the flag. 
+In this case, gk_getopt_long() returns 0.
+
+\return
+When a long option has an argument, gk_getopt_long() puts the argument 
+value in the variable #gk_optarg before returning. When the option has 
+no argument, the value in #gk_optarg is a null pointer. This is
+how you can tell whether an optional argument was supplied.
+
+\return
+When gk_getopt_long() has no more options to handle, it returns -1, 
+and leaves in the variable #gk_optind the index in argv of the next 
+remaining argument. 
+*/
+/*************************************************************************/
+int gk_getopt_long( int argc, char **argv, char *options, 
+       struct gk_option *long_options, int *opt_index)
+{
+  return gk_getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+
+
+/*************************************************************************/
+/*! \brief Parse command-line arguments with only long options
+
+Like gk_getopt_long(), but '-' as well as '--' can indicate a long option.
+If an option that starts with '-' (not '--') doesn't match a long option,
+but does match a short option, it is parsed as a short option instead.  
+*/
+/*************************************************************************/
+int gk_getopt_long_only(int argc, char **argv, char *options, 
+       struct gk_option *long_options, int *opt_index)
+{
+  return gk_getopt_internal(argc, argv, options, long_options, opt_index, 1);
+}
+
diff --git a/gk_arch.h b/gk_arch.h
new file mode 100644
index 0000000..b82fb6a
--- /dev/null
+++ b/gk_arch.h
@@ -0,0 +1,70 @@
+/*!
+\file gk_arch.h
+\brief This file contains various architecture-specific declerations
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_arch.h 21637 2018-01-03 22:37:24Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_ARCH_H_
+#define _GK_ARCH_H_
+
+/*************************************************************************
+* Architecture-specific differences in header files
+**************************************************************************/
+#ifdef LINUX
+#if !defined(__USE_XOPEN)
+#define __USE_XOPEN
+#endif
+#if !defined(_XOPEN_SOURCE)
+#define _XOPEN_SOURCE 600
+#endif
+#if !defined(__USE_XOPEN2K)
+#define __USE_XOPEN2K
+#endif
+#endif
+
+
+#ifdef HAVE_EXECINFO_H
+#include <execinfo.h>
+#endif
+
+
+#ifdef __MSC__ 
+  #include "gk_ms_stdint.h"
+  #include "gk_ms_inttypes.h"
+  #include "gk_ms_stat.h"
+  #include "win32/adapt.h"
+#else
+#ifndef SUNOS
+  #include <stdint.h>
+#endif
+  #include <inttypes.h>
+  #include <sys/types.h>
+#ifndef __MINGW32__
+  #include <sys/resource.h>
+#endif
+  #include <sys/time.h>
+  #include <unistd.h>
+#endif
+
+
+/*************************************************************************
+* Architecture-specific modifications
+**************************************************************************/
+#ifdef WIN32
+typedef ptrdiff_t ssize_t;
+#endif
+
+
+#ifdef SUNOS
+#define PTRDIFF_MAX  INT64_MAX
+#endif
+
+/* MSC does not have INFINITY defined */
+#ifndef INFINITY
+#define INFINITY FLT_MAX
+#endif
+
+#endif
diff --git a/gk_defs.h b/gk_defs.h
new file mode 100644
index 0000000..68cb9a4
--- /dev/null
+++ b/gk_defs.h
@@ -0,0 +1,87 @@
+/*!
+\file gk_defs.h
+\brief This file contains various constants definitions
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_defs.h 22039 2018-05-26 16:34:48Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_DEFS_H_
+#define _GK_DEFS_H_
+
+
+#define LTERM                   (void **) 0     /* List terminator for GKfree() */
+
+/* mopt_t types */
+#define GK_MOPT_MARK            1
+#define GK_MOPT_CORE            2
+#define GK_MOPT_HEAP            3
+
+#define HTABLE_EMPTY            -1
+#define HTABLE_DELETED          -2
+#define HTABLE_FIRST             1
+#define HTABLE_NEXT              2
+
+/* pdb corruption bit switches */
+#define CRP_ALTLOCS    1
+#define CRP_MISSINGCA  2
+#define CRP_MISSINGBB  4
+#define CRP_MULTICHAIN 8
+#define CRP_MULTICA    16
+#define CRP_MULTIBB    32
+
+#define MAXLINELEN 300000
+
+/* GKlib signals to standard signal mapping */
+#define SIGMEM  SIGABRT
+#define SIGERR  SIGTERM
+
+
+/* CSR-related defines */
+#define GK_CSR_ROW      1
+#define GK_CSR_COL      2
+#define GK_CSR_ROWCOL   3
+
+#define GK_CSR_MAXTF    1
+#define GK_CSR_SQRT     2
+#define GK_CSR_POW25    3
+#define GK_CSR_POW65    4
+#define GK_CSR_POW75    5
+#define GK_CSR_POW85    6
+#define GK_CSR_LOG      7
+#define GK_CSR_IDF      8
+#define GK_CSR_IDF2     9
+#define GK_CSR_MAXTF2   10
+
+#define GK_CSR_DOTP     1
+#define GK_CSR_COS      2
+#define GK_CSR_JAC      3
+#define GK_CSR_MIN      4
+#define GK_CSR_AMIN     5
+
+#define GK_CSR_FMT_AUTO         2
+#define GK_CSR_FMT_CLUTO        1
+#define GK_CSR_FMT_CSR          2
+#define GK_CSR_FMT_METIS        3
+#define GK_CSR_FMT_BINROW       4
+#define GK_CSR_FMT_BINCOL       5
+#define GK_CSR_FMT_IJV          6
+#define GK_CSR_FMT_BIJV         7
+
+#define GK_CSR_SYM_SUM          1
+#define GK_CSR_SYM_MIN          2
+#define GK_CSR_SYM_MAX          3
+#define GK_CSR_SYM_AVG          4
+
+
+#define GK_GRAPH_FMT_METIS      1
+#define GK_GRAPH_FMT_IJV        2
+#define GK_GRAPH_FMT_HIJV       3
+
+#define GK_GRAPH_SYM_SUM        1
+#define GK_GRAPH_SYM_MIN        2
+#define GK_GRAPH_SYM_MAX        3
+#define GK_GRAPH_SYM_AVG        4
+
+#endif
diff --git a/gk_externs.h b/gk_externs.h
new file mode 100644
index 0000000..2c0fdd9
--- /dev/null
+++ b/gk_externs.h
@@ -0,0 +1,25 @@
+/*!
+\file gk_externs.h
+\brief This file contains definitions of external variables created by GKlib
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_externs.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_EXTERNS_H_
+#define _GK_EXTERNS_H_
+
+
+/*************************************************************************
+* Extern variable definition. Hopefully, the __thread makes them thread-safe.
+**************************************************************************/
+#ifndef _GK_ERROR_C_
+/* declared in error.c */
+extern __thread int gk_cur_jbufs;
+extern __thread jmp_buf gk_jbufs[];
+extern __thread jmp_buf gk_jbuf;
+
+#endif
+
+#endif
diff --git a/gk_getopt.h b/gk_getopt.h
new file mode 100644
index 0000000..597c080
--- /dev/null
+++ b/gk_getopt.h
@@ -0,0 +1,64 @@
+/*!
+\file gk_getopt.h
+\brief This file contains GNU's externs/structs/prototypes
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_getopt.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_GETOPT_H_
+#define _GK_GETOPT_H_
+
+
+/* Externals from getopt.c */
+extern char *gk_optarg;
+extern int gk_optind;
+extern int gk_opterr;
+extern int gk_optopt;
+
+
+/*! \brief The structure that stores the information about the command-line options 
+
+This structure describes a single long option name for the sake of 
+gk_getopt_long(). The argument <tt>long_options</tt> must be an array 
+of these structures, one for each long option. Terminate the array with 
+an element containing all zeros.
+*/
+struct gk_option {
+  char *name;       /*!< This field is the name of the option. */
+  int has_arg;      /*!< This field says whether the option takes an argument. 
+                         It is an integer, and there are three legitimate values: 
+                         no_argument, required_argument and optional_argument. 
+                         */
+  int *flag;        /*!< See the discussion on ::gk_option#val */
+  int val;          /*!< These fields control how to report or act on the option 
+                         when it occurs. 
+                         
+                         If flag is a null pointer, then the val is a value which 
+                         identifies this option. Often these values are chosen 
+                         to uniquely identify particular long options.
+
+                         If flag is not a null pointer, it should be the address 
+                         of an int variable which is the flag for this option. 
+                         The value in val is the value to store in the flag to 
+                         indicate that the option was seen. */
+};
+
+/* Names for the values of the `has_arg' field of `struct gk_option'.  */
+#define no_argument		0
+#define required_argument	1
+#define optional_argument	2
+
+
+/* Function prototypes */
+extern int gk_getopt(int argc, char **argv, char *shortopts);
+extern int gk_getopt_long(int argc, char **argv, char *shortopts,
+              struct gk_option *longopts, int *longind);
+extern int gk_getopt_long_only (int argc, char **argv,
+              char *shortopts, struct gk_option *longopts, int *longind);
+
+
+
+#endif
+
diff --git a/gk_macros.h b/gk_macros.h
new file mode 100644
index 0000000..c3f1b45
--- /dev/null
+++ b/gk_macros.h
@@ -0,0 +1,169 @@
+/*!
+\file gk_macros.h
+\brief This file contains various macros
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_macros.h 15048 2013-08-31 19:38:14Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MACROS_H_
+#define _GK_MACROS_H_
+
+/*-------------------------------------------------------------
+ * Usefull commands 
+ *-------------------------------------------------------------*/
+#define gk_max(a, b) ((a) >= (b) ? (a) : (b))
+#define gk_min(a, b) ((a) >= (b) ? (b) : (a))
+#define gk_max3(a, b, c) ((a) >= (b) && (a) >= (c) ? (a) : ((b) >= (a) && (b) >= (c) ? (b) : (c)))
+#define gk_SWAP(a, b, tmp) do {(tmp) = (a); (a) = (b); (b) = (tmp);} while(0) 
+#define INC_DEC(a, b, val) do {(a) += (val); (b) -= (val);} while(0)
+#define sign(a, b) ((a >= 0 ? b : -b))
+
+#define ONEOVERRANDMAX (1.0/(RAND_MAX+1.0))
+#define RandomInRange(u) ((int) (ONEOVERRANDMAX*(u)*rand()))
+#define RandomInRange_r(s, u) ((int) (ONEOVERRANDMAX*(u)*rand_r(s)))
+
+#define gk_abs(x) ((x) >= 0 ? (x) : -(x))
+
+
+/*-------------------------------------------------------------
+ * Timing macros
+ *-------------------------------------------------------------*/
+#define gk_clearcputimer(tmr) (tmr = 0.0)
+#define gk_startcputimer(tmr) (tmr -= gk_CPUSeconds())
+#define gk_stopcputimer(tmr)  (tmr += gk_CPUSeconds())
+#define gk_getcputimer(tmr)   (tmr)
+
+#define gk_clearwctimer(tmr) (tmr = 0.0)
+#define gk_startwctimer(tmr) (tmr -= gk_WClockSeconds())
+#define gk_stopwctimer(tmr)  (tmr += gk_WClockSeconds())
+#define gk_getwctimer(tmr)   (tmr)
+
+/*-------------------------------------------------------------
+ * dbglvl handling macros
+ *-------------------------------------------------------------*/
+#define IFSET(a, flag, cmd) if ((a)&(flag)) (cmd);
+
+
+/*-------------------------------------------------------------
+ * gracefull library exit macro
+ *-------------------------------------------------------------*/
+#define GKSETJMP() (setjmp(gk_return_to_entry))
+#define gk_sigcatch() (setjmp(gk_jbufs[gk_cur_jbufs]))
+ 
+
+/*-------------------------------------------------------------
+ * Debuging memory leaks
+ *-------------------------------------------------------------*/
+#ifdef DMALLOC
+#   define MALLOC_CHECK(ptr)                                          \
+    if (malloc_verify((ptr)) == DMALLOC_VERIFY_ERROR) {  \
+        printf("***MALLOC_CHECK failed on line %d of file %s: " #ptr "\n", \
+              __LINE__, __FILE__);                               \
+        abort();                                                \
+    }
+#else
+#   define MALLOC_CHECK(ptr) ;
+#endif 
+
+
+/*-------------------------------------------------------------
+ * CSR conversion macros
+ *-------------------------------------------------------------*/
+#define MAKECSR(i, n, a) \
+   do { \
+     for (i=1; i<n; i++) a[i] += a[i-1]; \
+     for (i=n; i>0; i--) a[i] = a[i-1]; \
+     a[0] = 0; \
+   } while(0) 
+
+#define SHIFTCSR(i, n, a) \
+   do { \
+     for (i=n; i>0; i--) a[i] = a[i-1]; \
+     a[0] = 0; \
+   } while(0) 
+
+
+/*-------------------------------------------------------------
+ * ASSERTS that cannot be turned off!
+ *-------------------------------------------------------------*/
+#define GKASSERT(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        abort();                                                \
+    }
+
+#define GKASSERTP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+        abort();                                                \
+    }
+
+#define GKCUASSERT(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+    }
+
+#define GKWARN(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+    }
+
+#define GKCUASSERTP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+    }
+
+#define GKWARNP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+    }
+
+
+/*-------------------------------------------------------------
+ * Program Assertions
+ *-------------------------------------------------------------*/
+#ifndef NDEBUG
+#   define ASSERT(expr)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        assert(expr);                                                \
+    }
+
+#   define ASSERTP(expr,msg)                                          \
+    if (!(expr)) {                                               \
+        printf("***ASSERTION failed on line %d of file %s: " #expr "\n", \
+              __LINE__, __FILE__);                               \
+        printf msg ; \
+        printf("\n"); \
+        assert(expr);                                                \
+    }
+#else
+#   define ASSERT(expr) ;
+#   define ASSERTP(expr,msg) ;
+#endif 
+
+#ifndef NDEBUG2
+#   define ASSERT2 ASSERT
+#   define ASSERTP2 ASSERTP
+#else
+#   define ASSERT2(expr) ;
+#   define ASSERTP2(expr,msg) ;
+#endif
+
+
+#endif
diff --git a/gk_mkblas.h b/gk_mkblas.h
new file mode 100644
index 0000000..1231669
--- /dev/null
+++ b/gk_mkblas.h
@@ -0,0 +1,203 @@
+/*!
+\file  gk_mkblas.h
+\brief Templates for BLAS-like routines
+
+\date   Started 3/28/07
+\author George
+\version\verbatim $Id: gk_mkblas.h 16304 2014-02-25 14:27:19Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MKBLAS_H_
+#define _GK_MKBLAS_H_
+
+
+#define GK_MKBLAS(PRFX, TYPE, OUTTYPE) \
+/*************************************************************************/\
+/*! The macro for gk_?incset()-class of routines */\
+/*************************************************************************/\
+TYPE *PRFX ## incset(size_t n, TYPE baseval, TYPE *x)\
+{\
+  size_t i;\
+\
+  for (i=0; i<n; i++)\
+    x[i] = baseval+i;\
+\
+  return x;\
+}\
+\
+/*************************************************************************/\
+/*! The macro for gk_?max()-class of routines */\
+/*************************************************************************/\
+TYPE PRFX ## max(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  TYPE max;\
+\
+  if (n <= 0) return (TYPE) 0;\
+\
+  for (max=(*x), x+=incx, i=1; i<n; i++, x+=incx)\
+    max = ((*x) > max ? (*x) : max);\
+\
+  return max;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?min()-class of routines */\
+/*************************************************************************/\
+TYPE PRFX ## min(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  TYPE min;\
+\
+  if (n <= 0) return (TYPE) 0;\
+\
+  for (min=(*x), x+=incx, i=1; i<n; i++, x+=incx)\
+    min = ((*x) < min ? (*x) : min);\
+\
+  return min;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?argmax()-class of routines */\
+/*************************************************************************/\
+size_t PRFX ## argmax(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i, j, max=0;\
+\
+  for (i=1, j=incx; i<n; i++, j+=incx)\
+    max = (x[j] > x[max] ? j : max);\
+\
+  return (size_t)(max/incx);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?argmin()-class of routines */\
+/*************************************************************************/\
+size_t PRFX ## argmin(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i, j, min=0;\
+\
+  for (i=1, j=incx; i<n; i++, j+=incx)\
+    min = (x[j] < x[min] ? j : min);\
+\
+  return (size_t)(min/incx);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?argmax_n()-class of routines */\
+/*************************************************************************/\
+size_t PRFX ## argmax_n(size_t n, TYPE *x, size_t incx, size_t k)\
+{\
+  size_t i, j, max_n;\
+  PRFX ## kv_t *cand;\
+\
+  cand = PRFX ## kvmalloc(n, "GK_ARGMAX_N: cand");\
+\
+  for (i=0, j=0; i<n; i++, j+=incx) {\
+    cand[i].val = i;\
+    cand[i].key = x[j];\
+  }\
+  PRFX ## kvsortd(n, cand);\
+\
+  max_n = cand[k-1].val;\
+\
+  gk_free((void *)&cand, LTERM);\
+\
+  return max_n;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?sum()-class of routines */\
+/**************************************************************************/\
+OUTTYPE PRFX ## sum(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  OUTTYPE sum = 0;\
+\
+  for (i=0; i<n; i++, x+=incx)\
+    sum += (*x);\
+\
+  return sum;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?scale()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+\
+  for (i=0; i<n; i++, x+=incx)\
+    (*x) *= alpha;\
+\
+  return x;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?norm2()-class of routines */\
+/**************************************************************************/\
+OUTTYPE PRFX ## norm2(size_t n, TYPE *x, size_t incx)\
+{\
+  size_t i;\
+  OUTTYPE partial = 0;\
+\
+  for (i=0; i<n; i++, x+=incx)\
+    partial += (*x) * (*x);\
+\
+  return (partial > 0 ? (OUTTYPE)sqrt((double)partial) : (OUTTYPE)0);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?dot()-class of routines */\
+/**************************************************************************/\
+OUTTYPE PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy)\
+{\
+  size_t i;\
+  OUTTYPE partial = 0.0;\
+ \
+  for (i=0; i<n; i++, x+=incx, y+=incy)\
+    partial += (*x) * (*y);\
+\
+  return partial;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?axpy()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy)\
+{\
+  size_t i;\
+  TYPE *y_in = y;\
+\
+  for (i=0; i<n; i++, x+=incx, y+=incy)\
+    *y += alpha*(*x);\
+\
+  return y_in;\
+}\
+
+
+
+#define GK_MKBLAS_PROTO(PRFX, TYPE, OUTTYPE) \
+  TYPE    *PRFX ## incset(size_t n, TYPE baseval, TYPE *x);\
+  TYPE     PRFX ## max(size_t n, TYPE *x, size_t incx);\
+  TYPE     PRFX ## min(size_t n, TYPE *x, size_t incx);\
+  size_t   PRFX ## argmax(size_t n, TYPE *x, size_t incx);\
+  size_t   PRFX ## argmin(size_t n, TYPE *x, size_t incx);\
+  size_t   PRFX ## argmax_n(size_t n, TYPE *x, size_t incx, size_t k);\
+  OUTTYPE  PRFX ## sum(size_t n, TYPE *x, size_t incx);\
+  TYPE    *PRFX ## scale(size_t n, TYPE alpha, TYPE *x, size_t incx);\
+  OUTTYPE  PRFX ## norm2(size_t n, TYPE *x, size_t incx);\
+  OUTTYPE  PRFX ## dot(size_t n, TYPE *x, size_t incx, TYPE *y, size_t incy);\
+  TYPE    *PRFX ## axpy(size_t n, TYPE alpha, TYPE *x, size_t incx, TYPE *y, size_t incy);\
+
+
+#endif
diff --git a/gk_mkmemory.h b/gk_mkmemory.h
new file mode 100644
index 0000000..78e216e
--- /dev/null
+++ b/gk_mkmemory.h
@@ -0,0 +1,142 @@
+/*!
+\file  gk_mkmemory.h
+\brief Templates for memory allocation routines
+
+\date   Started 3/29/07
+\author George
+\version\verbatim $Id: gk_mkmemory.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MKMEMORY_H_
+#define _GK_MKMEMORY_H_
+
+
+#define GK_MKALLOC(PRFX, TYPE)\
+/*************************************************************************/\
+/*! The macro for gk_?malloc()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## malloc(size_t n, char *msg)\
+{\
+  return (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?realloc()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## realloc(TYPE *ptr, size_t n, char *msg)\
+{\
+  return (TYPE *)gk_realloc((void *)ptr, sizeof(TYPE)*n, msg);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?smalloc()-class of routines */\
+/**************************************************************************/\
+TYPE *PRFX ## smalloc(size_t n, TYPE ival, char *msg)\
+{\
+  TYPE *ptr;\
+\
+  ptr = (TYPE *)gk_malloc(sizeof(TYPE)*n, msg);\
+  if (ptr == NULL) \
+    return NULL; \
+\
+  return PRFX ## set(n, ival, ptr); \
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?set()-class of routines */\
+/*************************************************************************/\
+TYPE *PRFX ## set(size_t n, TYPE val, TYPE *x)\
+{\
+  size_t i;\
+\
+  for (i=0; i<n; i++)\
+    x[i] = val;\
+\
+  return x;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?set()-class of routines */\
+/*************************************************************************/\
+TYPE *PRFX ## copy(size_t n, TYPE *a, TYPE *b)\
+{\
+  return (TYPE *)memmove((void *)b, (void *)a, sizeof(TYPE)*n);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?AllocMatrix()-class of routines */\
+/**************************************************************************/\
+TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg)\
+{\
+  gk_idx_t i, j;\
+  TYPE **matrix;\
+\
+  matrix = (TYPE **)gk_malloc(ndim1*sizeof(TYPE *), errmsg);\
+  if (matrix == NULL) \
+    return NULL;\
+\
+  for (i=0; i<ndim1; i++) { \
+    matrix[i] = PRFX ## smalloc(ndim2, value, errmsg);\
+    if (matrix[i] == NULL) { \
+      for (j=0; j<i; j++) \
+        gk_free((void **)&matrix[j], LTERM); \
+      return NULL; \
+    } \
+  }\
+\
+  return matrix;\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?AllocMatrix()-class of routines */\
+/**************************************************************************/\
+void PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2)\
+{\
+  gk_idx_t i;\
+  TYPE **matrix;\
+\
+  if (*r_matrix == NULL) \
+    return; \
+\
+  matrix = *r_matrix;\
+\
+  for (i=0; i<ndim1; i++) \
+    gk_free((void **)&(matrix[i]), LTERM);\
+\
+  gk_free((void **)r_matrix, LTERM);\
+}\
+\
+\
+/*************************************************************************/\
+/*! The macro for gk_?SetMatrix()-class of routines */\
+/**************************************************************************/\
+void PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value)\
+{\
+  gk_idx_t i, j;\
+\
+  for (i=0; i<ndim1; i++) {\
+    for (j=0; j<ndim2; j++)\
+      matrix[i][j] = value;\
+  }\
+}\
+
+
+#define GK_MKALLOC_PROTO(PRFX, TYPE)\
+  TYPE  *PRFX ## malloc(size_t n, char *msg);\
+  TYPE  *PRFX ## realloc(TYPE *ptr, size_t n, char *msg);\
+  TYPE  *PRFX ## smalloc(size_t n, TYPE ival, char *msg);\
+  TYPE  *PRFX ## set(size_t n, TYPE val, TYPE *x);\
+  TYPE  *PRFX ## copy(size_t n, TYPE *a, TYPE *b);\
+  TYPE **PRFX ## AllocMatrix(size_t ndim1, size_t ndim2, TYPE value, char *errmsg);\
+  void   PRFX ## FreeMatrix(TYPE ***r_matrix, size_t ndim1, size_t ndim2);\
+  void   PRFX ## SetMatrix(TYPE **matrix, size_t ndim1, size_t ndim2, TYPE value);\
+
+
+
+#endif
diff --git a/gk_mkpqueue.h b/gk_mkpqueue.h
new file mode 100644
index 0000000..50a5385
--- /dev/null
+++ b/gk_mkpqueue.h
@@ -0,0 +1,440 @@
+/*!
+\file  gk_mkpqueue.h
+\brief Templates for priority queues
+
+\date   Started 4/09/07
+\author George
+\version\verbatim $Id: gk_mkpqueue.h 21742 2018-01-26 16:59:15Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKPQUEUE_H
+#define _GK_MKPQUEUE_H
+
+
+#define GK_MKPQUEUE(FPRFX, PQT, KVT, KT, VT, KVMALLOC, KMAX, KEY_LT)\
+/*************************************************************************/\
+/*! This function creates and initializes a priority queue */\
+/**************************************************************************/\
+PQT *FPRFX ## Create(size_t maxnodes)\
+{\
+  PQT *queue; \
+\
+  queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate: queue");\
+  FPRFX ## Init(queue, maxnodes);\
+\
+  return queue;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function initializes the data structures of the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Init(PQT *queue, size_t maxnodes)\
+{\
+  queue->nnodes = 0;\
+  queue->maxnodes = maxnodes;\
+\
+  queue->heap    = KVMALLOC(maxnodes, "gk_PQInit: heap");\
+  queue->locator = gk_idxsmalloc(maxnodes, -1, "gk_PQInit: locator");\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function resets the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Reset(PQT *queue)\
+{\
+  ssize_t i;\
+  ssize_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  for (i=queue->nnodes-1; i>=0; i--)\
+    locator[heap[i].val] = -1;\
+  queue->nnodes = 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function frees the internal datastructures of the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Free(PQT *queue)\
+{\
+  if (queue == NULL) return;\
+  gk_free((void **)&queue->heap, &queue->locator, LTERM);\
+  queue->maxnodes = 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function frees the internal datastructures of the priority queue \
+    and the queue itself */\
+/**************************************************************************/\
+void FPRFX ## Destroy(PQT *queue)\
+{\
+  if (queue == NULL) return;\
+  FPRFX ## Free(queue);\
+  gk_free((void **)&queue, LTERM);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the length of the queue */\
+/**************************************************************************/\
+size_t FPRFX ## Length(PQT *queue)\
+{\
+  return queue->nnodes;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function adds an item in the priority queue */\
+/**************************************************************************/\
+int FPRFX ## Insert(PQT *queue, VT node, KT key)\
+{\
+  ssize_t i, j;\
+  ssize_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  ASSERT(locator[node] == -1);\
+\
+  i = queue->nnodes++;\
+  while (i > 0) {\
+    j = (i-1)>>1;\
+    if (KEY_LT(key, heap[j].key)) {\
+      heap[i] = heap[j];\
+      locator[heap[i].val] = i;\
+      i = j;\
+    }\
+    else\
+      break;\
+  }\
+  ASSERT(i >= 0);\
+  heap[i].key   = key;\
+  heap[i].val   = node;\
+  locator[node] = i;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  return 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function deletes an item from the priority queue */\
+/**************************************************************************/\
+int FPRFX ## Delete(PQT *queue, VT node)\
+{\
+  ssize_t i, j;\
+  size_t nnodes;\
+  KT newkey, oldkey;\
+  ssize_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  ASSERT(locator[node] != -1);\
+  ASSERT(heap[locator[node]].val == node);\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  i = locator[node];\
+  locator[node] = -1;\
+\
+  if (--queue->nnodes > 0 && heap[queue->nnodes].val != node) {\
+    node   = heap[queue->nnodes].val;\
+    newkey = heap[queue->nnodes].key;\
+    oldkey = heap[i].key;\
+\
+    if (KEY_LT(newkey, oldkey)) { /* Filter-up */\
+      while (i > 0) {\
+        j = (i-1)>>1;\
+        if (KEY_LT(newkey, heap[j].key)) {\
+          heap[i] = heap[j];\
+          locator[heap[i].val] = i;\
+          i = j;\
+        }\
+        else\
+          break;\
+      }\
+    }\
+    else { /* Filter down */\
+      nnodes = queue->nnodes;\
+      while ((j=(i<<1)+1) < nnodes) {\
+        if (KEY_LT(heap[j].key, newkey)) {\
+          if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
+            j++;\
+          heap[i] = heap[j];\
+          locator[heap[i].val] = i;\
+          i = j;\
+        }\
+        else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\
+          j++;\
+          heap[i] = heap[j];\
+          locator[heap[i].val] = i;\
+          i = j;\
+        }\
+        else\
+          break;\
+      }\
+    }\
+\
+    heap[i].key   = newkey;\
+    heap[i].val   = node;\
+    locator[node] = i;\
+  }\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  return 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function updates the key values associated for a particular item */ \
+/**************************************************************************/\
+void FPRFX ## Update(PQT *queue, VT node, KT newkey)\
+{\
+  ssize_t i, j;\
+  size_t nnodes;\
+  KT oldkey;\
+  ssize_t *locator=queue->locator;\
+  KVT *heap=queue->heap;\
+\
+  oldkey = heap[locator[node]].key;\
+  if (!KEY_LT(newkey, oldkey) && !KEY_LT(oldkey, newkey)) return;\
+\
+  ASSERT(locator[node] != -1);\
+  ASSERT(heap[locator[node]].val == node);\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  i = locator[node];\
+\
+  if (KEY_LT(newkey, oldkey)) { /* Filter-up */\
+    while (i > 0) {\
+      j = (i-1)>>1;\
+      if (KEY_LT(newkey, heap[j].key)) {\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+  }\
+  else { /* Filter down */\
+    nnodes = queue->nnodes;\
+    while ((j=(i<<1)+1) < nnodes) {\
+      if (KEY_LT(heap[j].key, newkey)) {\
+        if (j+1 < nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
+          j++;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else if (j+1 < nnodes && KEY_LT(heap[j+1].key, newkey)) {\
+        j++;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+  }\
+\
+  heap[i].key   = newkey;\
+  heap[i].val   = node;\
+  locator[node] = i;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  return;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue and removes\
+    it from the priority queue */\
+/**************************************************************************/\
+VT FPRFX ## GetTop(PQT *queue)\
+{\
+  ssize_t i, j;\
+  ssize_t *locator;\
+  KVT *heap;\
+  VT vtx, node;\
+  KT key;\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+\
+  if (queue->nnodes == 0)\
+    return -1;\
+\
+  queue->nnodes--;\
+\
+  heap    = queue->heap;\
+  locator = queue->locator;\
+\
+  vtx = heap[0].val;\
+  locator[vtx] = -1;\
+\
+  if ((i = queue->nnodes) > 0) {\
+    key  = heap[i].key;\
+    node = heap[i].val;\
+    i = 0;\
+    while ((j=2*i+1) < queue->nnodes) {\
+      if (KEY_LT(heap[j].key, key)) {\
+        if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, heap[j].key))\
+          j = j+1;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else if (j+1 < queue->nnodes && KEY_LT(heap[j+1].key, key)) {\
+        j = j+1;\
+        heap[i] = heap[j];\
+        locator[heap[i].val] = i;\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+\
+    heap[i].key   = key;\
+    heap[i].val   = node;\
+    locator[node] = i;\
+  }\
+\
+  ASSERT2(FPRFX ## CheckHeap(queue));\
+  return vtx;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+VT FPRFX ## SeeTopVal(PQT *queue)\
+{\
+  return (queue->nnodes == 0 ? -1 : queue->heap[0].val);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the key of the top item. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+KT FPRFX ## SeeTopKey(PQT *queue)\
+{\
+  return (queue->nnodes == 0 ? KMAX : queue->heap[0].key);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the key of a specific item */\
+/**************************************************************************/\
+KT FPRFX ## SeeKey(PQT *queue, VT node)\
+{\
+  ssize_t *locator;\
+  KVT *heap;\
+\
+  heap    = queue->heap;\
+  locator = queue->locator;\
+\
+  return heap[locator[node]].key;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the first item in a breadth-first traversal of\
+    the heap whose key is less than maxwgt. This function is here due to\
+    hMETIS and is not general!*/\
+/**************************************************************************/\
+/*\
+VT FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts)\
+{\
+  ssize_t i;\
+\
+  if (queue->nnodes == 0)\
+    return -1;\
+\
+  if (maxwgt <= 1000)\
+    return FPRFX ## SeeTopVal(queue);\
+\
+  for (i=0; i<queue->nnodes; i++) {\
+    if (queue->heap[i].key > 0) {\
+      if (wgts[queue->heap[i].val] <= maxwgt)\
+        return queue->heap[i].val;\
+    }\
+    else {\
+      if (queue->heap[i/2].key <= 0)\
+        break;\
+    }\
+  }\
+\
+  return queue->heap[0].val;\
+\
+}\
+*/\
+\
+\
+/*************************************************************************/\
+/*! This functions checks the consistency of the heap */\
+/**************************************************************************/\
+int FPRFX ## CheckHeap(PQT *queue)\
+{\
+  ssize_t i, j;\
+  size_t nnodes;\
+  ssize_t *locator;\
+  KVT *heap;\
+\
+  heap    = queue->heap;\
+  locator = queue->locator;\
+  nnodes  = queue->nnodes;\
+\
+  if (nnodes == 0)\
+    return 1;\
+\
+  ASSERT(locator[heap[0].val] == 0);\
+  for (i=1; i<nnodes; i++) {\
+    ASSERT(locator[heap[i].val] == i);\
+    ASSERT(!KEY_LT(heap[i].key, heap[(i-1)/2].key));\
+  }\
+  for (i=1; i<nnodes; i++)\
+    ASSERT(!KEY_LT(heap[i].key, heap[0].key));\
+\
+  for (j=i=0; i<queue->maxnodes; i++) {\
+    if (locator[i] != -1)\
+      j++;\
+  }\
+  ASSERTP(j == nnodes, ("%jd %jd\n", (intmax_t)j, (intmax_t)nnodes));\
+\
+  return 1;\
+}\
+
+
+#define GK_MKPQUEUE_PROTO(FPRFX, PQT, KT, VT)\
+  PQT *  FPRFX ## Create(size_t maxnodes);\
+  void   FPRFX ## Init(PQT *queue, size_t maxnodes);\
+  void   FPRFX ## Reset(PQT *queue);\
+  void   FPRFX ## Free(PQT *queue);\
+  void   FPRFX ## Destroy(PQT *queue);\
+  size_t FPRFX ## Length(PQT *queue);\
+  int    FPRFX ## Insert(PQT *queue, VT node, KT key);\
+  int    FPRFX ## Delete(PQT *queue, VT node);\
+  void   FPRFX ## Update(PQT *queue, VT node, KT newkey);\
+  VT     FPRFX ## GetTop(PQT *queue);\
+  VT     FPRFX ## SeeTopVal(PQT *queue);\
+  KT     FPRFX ## SeeTopKey(PQT *queue);\
+  KT     FPRFX ## SeeKey(PQT *queue, VT node);\
+  VT     FPRFX ## SeeConstraintTop(PQT *queue, KT maxwgt, KT *wgts);\
+  int    FPRFX ## CheckHeap(PQT *queue);\
+
+
+/* This is how these macros are used
+GK_MKPQUEUE(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t, gk_dkvmalloc, DBL_MAX)
+GK_MKPQUEUE_PROTO(gk_dkvPQ, gk_dkvPQ_t, double, gk_idx_t)
+*/
+
+
+#endif
diff --git a/gk_mkpqueue2.h b/gk_mkpqueue2.h
new file mode 100644
index 0000000..10e8ee4
--- /dev/null
+++ b/gk_mkpqueue2.h
@@ -0,0 +1,215 @@
+/*!
+\file  gk_mkpqueue2.h
+\brief Templates for priority queues that do not utilize locators and as such
+       they can use different types of values.
+
+\date   Started 4/09/07
+\author George
+\version\verbatim $Id: gk_mkpqueue2.h 13005 2012-10-23 22:34:36Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKPQUEUE2_H
+#define _GK_MKPQUEUE2_H
+
+
+#define GK_MKPQUEUE2(FPRFX, PQT, KT, VT, KMALLOC, VMALLOC, KMAX, KEY_LT)\
+/*************************************************************************/\
+/*! This function creates and initializes a priority queue */\
+/**************************************************************************/\
+PQT *FPRFX ## Create2(ssize_t maxnodes)\
+{\
+  PQT *queue; \
+\
+  if ((queue = (PQT *)gk_malloc(sizeof(PQT), "gk_pqCreate2: queue")) != NULL) {\
+    memset(queue, 0, sizeof(PQT));\
+    queue->nnodes   = 0;\
+    queue->maxnodes = maxnodes;\
+    queue->keys     = KMALLOC(maxnodes, "gk_pqCreate2: keys");\
+    queue->vals     = VMALLOC(maxnodes, "gk_pqCreate2: vals");\
+\
+    if (queue->keys == NULL || queue->vals == NULL)\
+      gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\
+  }\
+\
+  return queue;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function resets the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Reset2(PQT *queue)\
+{\
+  queue->nnodes = 0;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function frees the internal datastructures of the priority queue */\
+/**************************************************************************/\
+void FPRFX ## Destroy2(PQT **r_queue)\
+{\
+  PQT *queue = *r_queue; \
+  if (queue == NULL) return;\
+  gk_free((void **)&queue->keys, &queue->vals, &queue, LTERM);\
+  *r_queue = NULL;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the length of the queue */\
+/**************************************************************************/\
+size_t FPRFX ## Length2(PQT *queue)\
+{\
+  return queue->nnodes;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function adds an item in the priority queue. */\
+/**************************************************************************/\
+int FPRFX ## Insert2(PQT *queue, VT val, KT key)\
+{\
+  ssize_t i, j;\
+  KT *keys=queue->keys;\
+  VT *vals=queue->vals;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  if (queue->nnodes == queue->maxnodes) \
+    return 0;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  i = queue->nnodes++;\
+  while (i > 0) {\
+    j = (i-1)>>1;\
+    if (KEY_LT(key, keys[j])) {\
+      keys[i] = keys[j];\
+      vals[i] = vals[j];\
+      i = j;\
+    }\
+    else\
+      break;\
+  }\
+  ASSERT(i >= 0);\
+  keys[i] = key;\
+  vals[i] = val;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  return 1;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue and removes\
+    it from the priority queue */\
+/**************************************************************************/\
+int FPRFX ## GetTop2(PQT *queue, VT *r_val)\
+{\
+  ssize_t i, j;\
+  KT key, *keys=queue->keys;\
+  VT val, *vals=queue->vals;\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  if (queue->nnodes == 0)\
+    return 0;\
+\
+  queue->nnodes--;\
+\
+  *r_val = vals[0];\
+\
+  if ((i = queue->nnodes) > 0) {\
+    key = keys[i];\
+    val = vals[i];\
+    i = 0;\
+    while ((j=2*i+1) < queue->nnodes) {\
+      if (KEY_LT(keys[j], key)) {\
+        if (j+1 < queue->nnodes && KEY_LT(keys[j+1], keys[j]))\
+          j = j+1;\
+        keys[i] = keys[j];\
+        vals[i] = vals[j];\
+        i = j;\
+      }\
+      else if (j+1 < queue->nnodes && KEY_LT(keys[j+1], key)) {\
+        j = j+1;\
+        keys[i] = keys[j];\
+        vals[i] = vals[j];\
+        i = j;\
+      }\
+      else\
+        break;\
+    }\
+\
+    keys[i] = key;\
+    vals[i] = val;\
+  }\
+\
+  ASSERT2(FPRFX ## CheckHeap2(queue));\
+\
+  return 1;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the item at the top of the queue. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+int FPRFX ## SeeTopVal2(PQT *queue, VT *r_val)\
+{\
+  if (queue->nnodes == 0) \
+    return 0;\
+\
+  *r_val = queue->vals[0];\
+\
+  return 1;\
+}\
+\
+\
+/*************************************************************************/\
+/*! This function returns the key of the top item. The item is not\
+    deleted from the queue. */\
+/**************************************************************************/\
+KT FPRFX ## SeeTopKey2(PQT *queue)\
+{\
+  return (queue->nnodes == 0 ? KMAX : queue->keys[0]);\
+}\
+\
+\
+/*************************************************************************/\
+/*! This functions checks the consistency of the heap */\
+/**************************************************************************/\
+int FPRFX ## CheckHeap2(PQT *queue)\
+{\
+  ssize_t i;\
+  KT *keys=queue->keys;\
+\
+  if (queue->nnodes == 0)\
+    return 1;\
+\
+  for (i=1; i<queue->nnodes; i++) {\
+    ASSERT(!KEY_LT(keys[i], keys[(i-1)/2]));\
+  }\
+  for (i=1; i<queue->nnodes; i++)\
+    ASSERT(!KEY_LT(keys[i], keys[0]));\
+\
+  return 1;\
+}\
+
+
+#define GK_MKPQUEUE2_PROTO(FPRFX, PQT, KT, VT)\
+  PQT *  FPRFX ## Create2(ssize_t maxnodes);\
+  void   FPRFX ## Reset2(PQT *queue);\
+  void   FPRFX ## Destroy2(PQT **r_queue);\
+  size_t FPRFX ## Length2(PQT *queue);\
+  int    FPRFX ## Insert2(PQT *queue, VT node, KT key);\
+  int    FPRFX ## GetTop2(PQT *queue, VT *r_val);\
+  int    FPRFX ## SeeTopVal2(PQT *queue, VT *r_val);\
+  KT     FPRFX ## SeeTopKey2(PQT *queue);\
+  int    FPRFX ## CheckHeap2(PQT *queue);\
+
+
+#endif
diff --git a/gk_mkrandom.h b/gk_mkrandom.h
new file mode 100644
index 0000000..68d54fa
--- /dev/null
+++ b/gk_mkrandom.h
@@ -0,0 +1,123 @@
+/*!
+\file  
+\brief Templates for portable random number generation
+
+\date   Started 5/17/07
+\author George
+\version\verbatim $Id: gk_mkrandom.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKRANDOM_H
+#define _GK_MKRANDOM_H
+
+/*************************************************************************/\
+/*! The generator for the rand() related routines.  \
+   \params RNGT  the datatype that defines the range of values over which\
+                 random numbers will be generated\
+   \params VALT  the datatype that defines the contents of the array to \
+                 be permuted by randArrayPermute() \
+   \params FPRFX the function prefix \
+*/\
+/**************************************************************************/\
+#define GK_MKRANDOM(FPRFX, RNGT, VALT)\
+/*************************************************************************/\
+/*! Initializes the generator */ \
+/**************************************************************************/\
+void FPRFX ## srand(RNGT seed) \
+{\
+  gk_randinit((uint64_t) seed);\
+}\
+\
+\
+/*************************************************************************/\
+/*! Returns a random number */ \
+/**************************************************************************/\
+RNGT FPRFX ## rand() \
+{\
+  if (sizeof(RNGT) <= sizeof(int32_t)) \
+    return (RNGT)gk_randint32(); \
+  else \
+    return (RNGT)gk_randint64(); \
+}\
+\
+\
+/*************************************************************************/\
+/*! Returns a random number between [0, max) */ \
+/**************************************************************************/\
+RNGT FPRFX ## randInRange(RNGT max) \
+{\
+  return (RNGT)((FPRFX ## rand())%max); \
+}\
+\
+\
+/*************************************************************************/\
+/*! Randomly permutes the elements of an array p[]. \
+    flag == 1, p[i] = i prior to permutation, \
+    flag == 0, p[] is not initialized. */\
+/**************************************************************************/\
+void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag)\
+{\
+  RNGT i, u, v;\
+  VALT tmp;\
+\
+  if (flag == 1) {\
+    for (i=0; i<n; i++)\
+      p[i] = (VALT)i;\
+  }\
+\
+  if (n < 10) {\
+    for (i=0; i<n; i++) {\
+      v = FPRFX ## randInRange(n);\
+      u = FPRFX ## randInRange(n);\
+      gk_SWAP(p[v], p[u], tmp);\
+    }\
+  }\
+  else {\
+    for (i=0; i<nshuffles; i++) {\
+      v = FPRFX ## randInRange(n-3);\
+      u = FPRFX ## randInRange(n-3);\
+      /*gk_SWAP(p[v+0], p[u+0], tmp);*/\
+      /*gk_SWAP(p[v+1], p[u+1], tmp);*/\
+      /*gk_SWAP(p[v+2], p[u+2], tmp);*/\
+      /*gk_SWAP(p[v+3], p[u+3], tmp);*/\
+      gk_SWAP(p[v+0], p[u+2], tmp);\
+      gk_SWAP(p[v+1], p[u+3], tmp);\
+      gk_SWAP(p[v+2], p[u+0], tmp);\
+      gk_SWAP(p[v+3], p[u+1], tmp);\
+    }\
+  }\
+}\
+\
+\
+/*************************************************************************/\
+/*! Randomly permutes the elements of an array p[]. \
+    flag == 1, p[i] = i prior to permutation, \
+    flag == 0, p[] is not initialized. */\
+/**************************************************************************/\
+void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag)\
+{\
+  RNGT i, v;\
+  VALT tmp;\
+\
+  if (flag == 1) {\
+    for (i=0; i<n; i++)\
+      p[i] = (VALT)i;\
+  }\
+\
+  for (i=0; i<n; i++) {\
+    v = FPRFX ## randInRange(n);\
+    gk_SWAP(p[i], p[v], tmp);\
+  }\
+}\
+
+
+#define GK_MKRANDOM_PROTO(FPRFX, RNGT, VALT)\
+  void FPRFX ## srand(RNGT seed); \
+  RNGT FPRFX ## rand(); \
+  RNGT FPRFX ## randInRange(RNGT max); \
+  void FPRFX ## randArrayPermute(RNGT n, VALT *p, RNGT nshuffles, int flag);\
+  void FPRFX ## randArrayPermuteFine(RNGT n, VALT *p, int flag);\
+
+
+#endif
diff --git a/gk_mksort.h b/gk_mksort.h
new file mode 100644
index 0000000..48674db
--- /dev/null
+++ b/gk_mksort.h
@@ -0,0 +1,271 @@
+/*!
+\file  gk_mksort.h
+\brief Templates for the qsort routine
+
+\date   Started 3/28/07
+\author George
+\version\verbatim $Id: gk_mksort.h 21051 2017-05-25 04:36:14Z karypis $ \endverbatim
+*/
+
+
+#ifndef _GK_MKSORT_H_
+#define _GK_MKSORT_H_
+
+/* Adopted from GNU glibc by Mjt.
+ * See stdlib/qsort.c in glibc */
+
+/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* in-line qsort implementation.  Differs from traditional qsort() routine
+ * in that it is a macro, not a function, and instead of passing an address
+ * of a comparision routine to the function, it is possible to inline
+ * comparision routine, thus speed up sorting alot.
+ *
+ * Usage:
+ *  #include "iqsort.h"
+ *  #define islt(a,b) (strcmp((*a),(*b))<0)
+ *  char *arr[];
+ *  int n;
+ *  GKQSORT(char*, arr, n, islt);
+ *
+ * The "prototype" and 4 arguments are:
+ *  GKQSORT(TYPE,BASE,NELT,ISLT)
+ *  1) type of each element, TYPE,
+ *  2) address of the beginning of the array, of type TYPE*,
+ *  3) number of elements in the array, and
+ *  4) comparision routine.
+ * Array pointer and number of elements are referenced only once.
+ * This is similar to a call
+ *  qsort(BASE,NELT,sizeof(TYPE),ISLT)
+ * with the difference in last parameter.
+ * Note the islt macro/routine (it receives pointers to two elements):
+ * the only condition of interest is whenever one element is less than
+ * another, no other conditions (greather than, equal to etc) are tested.
+ * So, for example, to define integer sort, use:
+ *  #define islt(a,b) ((*a)<(*b))
+ *  GKQSORT(int, arr, n, islt)
+ *
+ * The macro could be used to implement a sorting function (see examples
+ * below), or to implement the sorting algorithm inline.  That is, either
+ * create a sorting function and use it whenever you want to sort something,
+ * or use GKQSORT() macro directly instead a call to such routine.  Note that
+ * the macro expands to quite some code (compiled size of int qsort on x86
+ * is about 700..800 bytes).
+ *
+ * Using this macro directly it isn't possible to implement traditional
+ * qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE),
+ * while qsort() allows element size to be different.
+ *
+ * Several ready-to-use examples:
+ *
+ * Sorting array of integers:
+ * void int_qsort(int *arr, unsigned n) {
+ * #define int_lt(a,b) ((*a)<(*b))
+ *   GKQSORT(int, arr, n, int_lt);
+ * }
+ *
+ * Sorting array of string pointers:
+ * void str_qsort(char *arr[], unsigned n) {
+ * #define str_lt(a,b) (strcmp((*a),(*b)) < 0)
+ *   GKQSORT(char*, arr, n, str_lt);
+ * }
+ *
+ * Sorting array of structures:
+ *
+ * struct elt {
+ *   int key;
+ *   ...
+ * };
+ * void elt_qsort(struct elt *arr, unsigned n) {
+ * #define elt_lt(a,b) ((a)->key < (b)->key)
+ *  GKQSORT(struct elt, arr, n, elt_lt);
+ * }
+ *
+ * And so on.
+ */
+
+/* Swap two items pointed to by A and B using temporary buffer t. */
+#define _GKQSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t)))
+
+/* Discontinue quicksort algorithm when partition gets below this size. */
+#define _GKQSORT_MAX_THRESH 8
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+#define _GKQSORT_STACK_SIZE	    (8 * sizeof(size_t))
+#define _GKQSORT_PUSH(top, low, high) (((top->_lo = (low)), (top->_hi = (high)), ++top))
+#define	_GKQSORT_POP(low, high, top)  ((--top, (low = top->_lo), (high = top->_hi)))
+#define	_GKQSORT_STACK_NOT_EMPTY	    (_stack < _top)
+
+
+/* The main code starts here... */
+#define GK_MKQSORT(GKQSORT_TYPE,GKQSORT_BASE,GKQSORT_NELT,GKQSORT_LT)   \
+{									\
+  GKQSORT_TYPE *const _base = (GKQSORT_BASE);				\
+  const size_t _elems = (GKQSORT_NELT);					\
+  GKQSORT_TYPE _hold;							\
+									\
+  if (_elems < 1)                                                      \
+    return;                                                             \
+                                                                        \
+  /* Don't declare two variables of type GKQSORT_TYPE in a single	\
+   * statement: eg `TYPE a, b;', in case if TYPE is a pointer,		\
+   * expands to `type* a, b;' wich isn't what we want.			\
+   */									\
+									\
+  if (_elems > _GKQSORT_MAX_THRESH) {					\
+    GKQSORT_TYPE *_lo = _base;						\
+    GKQSORT_TYPE *_hi = _lo + _elems - 1;				\
+    struct {								\
+      GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo;				\
+    } _stack[_GKQSORT_STACK_SIZE], *_top = _stack + 1;			\
+									\
+    while (_GKQSORT_STACK_NOT_EMPTY) {					\
+      GKQSORT_TYPE *_left_ptr; GKQSORT_TYPE *_right_ptr;		\
+									\
+      /* Select median value from among LO, MID, and HI. Rearrange	\
+         LO and HI so the three values are sorted. This lowers the	\
+         probability of picking a pathological pivot value and		\
+         skips a comparison for both the LEFT_PTR and RIGHT_PTR in	\
+         the while loops. */						\
+									\
+      GKQSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1);			\
+									\
+      if (GKQSORT_LT (_mid, _lo))					\
+        _GKQSORT_SWAP (_mid, _lo, _hold);				\
+      if (GKQSORT_LT (_hi, _mid))					\
+        _GKQSORT_SWAP (_mid, _hi, _hold);				\
+      else								\
+        goto _jump_over;						\
+      if (GKQSORT_LT (_mid, _lo))					\
+        _GKQSORT_SWAP (_mid, _lo, _hold);				\
+  _jump_over:;								\
+									\
+      _left_ptr  = _lo + 1;						\
+      _right_ptr = _hi - 1;						\
+									\
+      /* Here's the famous ``collapse the walls'' section of quicksort.	\
+         Gotta like those tight inner loops!  They are the main reason	\
+         that this algorithm runs much faster than others. */		\
+      do {								\
+        while (GKQSORT_LT (_left_ptr, _mid))				\
+         ++_left_ptr;							\
+									\
+        while (GKQSORT_LT (_mid, _right_ptr))				\
+          --_right_ptr;							\
+									\
+        if (_left_ptr < _right_ptr) {					\
+          _GKQSORT_SWAP (_left_ptr, _right_ptr, _hold);			\
+          if (_mid == _left_ptr)					\
+            _mid = _right_ptr;						\
+          else if (_mid == _right_ptr)					\
+            _mid = _left_ptr;						\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+        }								\
+        else if (_left_ptr == _right_ptr) {				\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+          break;							\
+        }								\
+      } while (_left_ptr <= _right_ptr);				\
+									\
+     /* Set up pointers for next iteration.  First determine whether	\
+        left and right partitions are below the threshold size.  If so,	\
+        ignore one or both.  Otherwise, push the larger partition's	\
+        bounds on the stack and continue sorting the smaller one. */	\
+									\
+      if (_right_ptr - _lo <= _GKQSORT_MAX_THRESH) {			\
+        if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH)			\
+          /* Ignore both small partitions. */				\
+          _GKQSORT_POP (_lo, _hi, _top);				\
+        else								\
+          /* Ignore small left partition. */				\
+          _lo = _left_ptr;						\
+      }									\
+      else if (_hi - _left_ptr <= _GKQSORT_MAX_THRESH)			\
+        /* Ignore small right partition. */				\
+        _hi = _right_ptr;						\
+      else if (_right_ptr - _lo > _hi - _left_ptr) {			\
+        /* Push larger left partition indices. */			\
+        _GKQSORT_PUSH (_top, _lo, _right_ptr);				\
+        _lo = _left_ptr;						\
+      }									\
+      else {								\
+        /* Push larger right partition indices. */			\
+        _GKQSORT_PUSH (_top, _left_ptr, _hi);				\
+        _hi = _right_ptr;						\
+      }									\
+    }									\
+  }									\
+									\
+  /* Once the BASE array is partially sorted by quicksort the rest	\
+     is completely sorted using insertion sort, since this is efficient	\
+     for partitions below MAX_THRESH size. BASE points to the		\
+     beginning of the array to sort, and END_PTR points at the very	\
+     last element in the array (*not* one beyond it!). */		\
+									\
+  {									\
+    GKQSORT_TYPE *const _end_ptr = _base + _elems - 1;			\
+    GKQSORT_TYPE *_tmp_ptr = _base;					\
+    register GKQSORT_TYPE *_run_ptr;					\
+    GKQSORT_TYPE *_thresh;						\
+									\
+    _thresh = _base + _GKQSORT_MAX_THRESH;				\
+    if (_thresh > _end_ptr)						\
+      _thresh = _end_ptr;						\
+									\
+    /* Find smallest element in first threshold and place it at the	\
+       array's beginning.  This is the smallest array element,		\
+       and the operation speeds up insertion sort's inner loop. */	\
+									\
+    for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr)	\
+      if (GKQSORT_LT (_run_ptr, _tmp_ptr))				\
+        _tmp_ptr = _run_ptr;						\
+									\
+    if (_tmp_ptr != _base)						\
+      _GKQSORT_SWAP (_tmp_ptr, _base, _hold);				\
+									\
+    /* Insertion sort, running from left-hand-side			\
+     * up to right-hand-side.  */					\
+									\
+    _run_ptr = _base + 1;						\
+    while (++_run_ptr <= _end_ptr) {					\
+      _tmp_ptr = _run_ptr - 1;						\
+      while (GKQSORT_LT (_run_ptr, _tmp_ptr))				\
+        --_tmp_ptr;							\
+									\
+      ++_tmp_ptr;							\
+      if (_tmp_ptr != _run_ptr) {					\
+        GKQSORT_TYPE *_trav = _run_ptr + 1;				\
+        while (--_trav >= _run_ptr) {					\
+          GKQSORT_TYPE *_hi; GKQSORT_TYPE *_lo;				\
+          _hold = *_trav;						\
+									\
+          for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo)		\
+            *_hi = *_lo;						\
+          *_hi = _hold;							\
+        }								\
+      }									\
+    }									\
+  }									\
+									\
+}
+
+#endif
diff --git a/gk_mkutils.h b/gk_mkutils.h
new file mode 100644
index 0000000..a092f22
--- /dev/null
+++ b/gk_mkutils.h
@@ -0,0 +1,40 @@
+/*!
+\file  
+\brief Templates for various utility routines
+
+\date   Started 5/28/07
+\author George
+\version\verbatim $Id: gk_mkutils.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_MKUTILS_H_
+#define _GK_MKUTILS_H_
+
+
+#define GK_MKARRAY2CSR(PRFX, TYPE)\
+/*************************************************************************/\
+/*! The macro for gk_?array2csr() routine */\
+/**************************************************************************/\
+void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind)\
+{\
+  TYPE i;\
+\
+  for (i=0; i<=range; i++)\
+    ptr[i] = 0;\
+\
+  for (i=0; i<n; i++)\
+    ptr[array[i]]++;\
+\
+  /* Compute the ptr, ind structure */\
+  MAKECSR(i, range, ptr);\
+  for (i=0; i<n; i++)\
+    ind[ptr[array[i]]++] = i;\
+  SHIFTCSR(i, range, ptr);\
+}
+
+
+#define GK_MKARRAY2CSR_PROTO(PRFX, TYPE)\
+  void PRFX ## array2csr(TYPE n, TYPE range, TYPE *array, TYPE *ptr, TYPE *ind);\
+
+
+#endif
diff --git a/gk_ms_inttypes.h b/gk_ms_inttypes.h
new file mode 100644
index 0000000..b89fc10
--- /dev/null
+++ b/gk_ms_inttypes.h
@@ -0,0 +1,301 @@
+// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_INTTYPES_H_ // [
+#define _MSC_INTTYPES_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include "gk_ms_stdint.h"
+
+// 7.8 Format conversion of integer types
+
+typedef struct {
+   intmax_t quot;
+   intmax_t rem;
+} imaxdiv_t;
+
+// 7.8.1 Macros for format specifiers
+
+// The fprintf macros for signed integers are:
+#define PRId8       "d"
+#define PRIi8       "i"
+#define PRIdLEAST8  "d"
+#define PRIiLEAST8  "i"
+#define PRIdFAST8   "d"
+#define PRIiFAST8   "i"
+
+#define PRId16       "hd"
+#define PRIi16       "hi"
+#define PRIdLEAST16  "hd"
+#define PRIiLEAST16  "hi"
+#define PRIdFAST16   "hd"
+#define PRIiFAST16   "hi"
+
+#define PRId32       "I32d"
+#define PRIi32       "I32i"
+#define PRIdLEAST32  "I32d"
+#define PRIiLEAST32  "I32i"
+#define PRIdFAST32   "I32d"
+#define PRIiFAST32   "I32i"
+
+#define PRId64       "I64d"
+#define PRIi64       "I64i"
+#define PRIdLEAST64  "I64d"
+#define PRIiLEAST64  "I64i"
+#define PRIdFAST64   "I64d"
+#define PRIiFAST64   "I64i"
+
+#define PRIdMAX     "I64d"
+#define PRIiMAX     "I64i"
+
+#define PRIdPTR     "Id"
+#define PRIiPTR     "Ii"
+
+// The fprintf macros for unsigned integers are:
+#define PRIo8       "o"
+#define PRIu8       "u"
+#define PRIx8       "x"
+#define PRIX8       "X"
+#define PRIoLEAST8  "o"
+#define PRIuLEAST8  "u"
+#define PRIxLEAST8  "x"
+#define PRIXLEAST8  "X"
+#define PRIoFAST8   "o"
+#define PRIuFAST8   "u"
+#define PRIxFAST8   "x"
+#define PRIXFAST8   "X"
+
+#define PRIo16       "ho"
+#define PRIu16       "hu"
+#define PRIx16       "hx"
+#define PRIX16       "hX"
+#define PRIoLEAST16  "ho"
+#define PRIuLEAST16  "hu"
+#define PRIxLEAST16  "hx"
+#define PRIXLEAST16  "hX"
+#define PRIoFAST16   "ho"
+#define PRIuFAST16   "hu"
+#define PRIxFAST16   "hx"
+#define PRIXFAST16   "hX"
+
+#define PRIo32       "I32o"
+#define PRIu32       "I32u"
+#define PRIx32       "I32x"
+#define PRIX32       "I32X"
+#define PRIoLEAST32  "I32o"
+#define PRIuLEAST32  "I32u"
+#define PRIxLEAST32  "I32x"
+#define PRIXLEAST32  "I32X"
+#define PRIoFAST32   "I32o"
+#define PRIuFAST32   "I32u"
+#define PRIxFAST32   "I32x"
+#define PRIXFAST32   "I32X"
+
+#define PRIo64       "I64o"
+#define PRIu64       "I64u"
+#define PRIx64       "I64x"
+#define PRIX64       "I64X"
+#define PRIoLEAST64  "I64o"
+#define PRIuLEAST64  "I64u"
+#define PRIxLEAST64  "I64x"
+#define PRIXLEAST64  "I64X"
+#define PRIoFAST64   "I64o"
+#define PRIuFAST64   "I64u"
+#define PRIxFAST64   "I64x"
+#define PRIXFAST64   "I64X"
+
+#define PRIoMAX     "I64o"
+#define PRIuMAX     "I64u"
+#define PRIxMAX     "I64x"
+#define PRIXMAX     "I64X"
+
+#define PRIoPTR     "Io"
+#define PRIuPTR     "Iu"
+#define PRIxPTR     "Ix"
+#define PRIXPTR     "IX"
+
+// The fscanf macros for signed integers are:
+#define SCNd8       "d"
+#define SCNi8       "i"
+#define SCNdLEAST8  "d"
+#define SCNiLEAST8  "i"
+#define SCNdFAST8   "d"
+#define SCNiFAST8   "i"
+
+#define SCNd16       "hd"
+#define SCNi16       "hi"
+#define SCNdLEAST16  "hd"
+#define SCNiLEAST16  "hi"
+#define SCNdFAST16   "hd"
+#define SCNiFAST16   "hi"
+
+#define SCNd32       "ld"
+#define SCNi32       "li"
+#define SCNdLEAST32  "ld"
+#define SCNiLEAST32  "li"
+#define SCNdFAST32   "ld"
+#define SCNiFAST32   "li"
+
+#define SCNd64       "I64d"
+#define SCNi64       "I64i"
+#define SCNdLEAST64  "I64d"
+#define SCNiLEAST64  "I64i"
+#define SCNdFAST64   "I64d"
+#define SCNiFAST64   "I64i"
+
+#define SCNdMAX     "I64d"
+#define SCNiMAX     "I64i"
+
+#ifdef _WIN64 // [
+#  define SCNdPTR     "I64d"
+#  define SCNiPTR     "I64i"
+#else  // _WIN64 ][
+#  define SCNdPTR     "ld"
+#  define SCNiPTR     "li"
+#endif  // _WIN64 ]
+
+// The fscanf macros for unsigned integers are:
+#define SCNo8       "o"
+#define SCNu8       "u"
+#define SCNx8       "x"
+#define SCNX8       "X"
+#define SCNoLEAST8  "o"
+#define SCNuLEAST8  "u"
+#define SCNxLEAST8  "x"
+#define SCNXLEAST8  "X"
+#define SCNoFAST8   "o"
+#define SCNuFAST8   "u"
+#define SCNxFAST8   "x"
+#define SCNXFAST8   "X"
+
+#define SCNo16       "ho"
+#define SCNu16       "hu"
+#define SCNx16       "hx"
+#define SCNX16       "hX"
+#define SCNoLEAST16  "ho"
+#define SCNuLEAST16  "hu"
+#define SCNxLEAST16  "hx"
+#define SCNXLEAST16  "hX"
+#define SCNoFAST16   "ho"
+#define SCNuFAST16   "hu"
+#define SCNxFAST16   "hx"
+#define SCNXFAST16   "hX"
+
+#define SCNo32       "lo"
+#define SCNu32       "lu"
+#define SCNx32       "lx"
+#define SCNX32       "lX"
+#define SCNoLEAST32  "lo"
+#define SCNuLEAST32  "lu"
+#define SCNxLEAST32  "lx"
+#define SCNXLEAST32  "lX"
+#define SCNoFAST32   "lo"
+#define SCNuFAST32   "lu"
+#define SCNxFAST32   "lx"
+#define SCNXFAST32   "lX"
+
+#define SCNo64       "I64o"
+#define SCNu64       "I64u"
+#define SCNx64       "I64x"
+#define SCNX64       "I64X"
+#define SCNoLEAST64  "I64o"
+#define SCNuLEAST64  "I64u"
+#define SCNxLEAST64  "I64x"
+#define SCNXLEAST64  "I64X"
+#define SCNoFAST64   "I64o"
+#define SCNuFAST64   "I64u"
+#define SCNxFAST64   "I64x"
+#define SCNXFAST64   "I64X"
+
+#define SCNoMAX     "I64o"
+#define SCNuMAX     "I64u"
+#define SCNxMAX     "I64x"
+#define SCNXMAX     "I64X"
+
+#ifdef _WIN64 // [
+#  define SCNoPTR     "I64o"
+#  define SCNuPTR     "I64u"
+#  define SCNxPTR     "I64x"
+#  define SCNXPTR     "I64X"
+#else  // _WIN64 ][
+#  define SCNoPTR     "lo"
+#  define SCNuPTR     "lu"
+#  define SCNxPTR     "lx"
+#  define SCNXPTR     "lX"
+#endif  // _WIN64 ]
+
+// 7.8.2 Functions for greatest-width integer types
+
+// 7.8.2.1 The imaxabs function
+#define imaxabs _abs64
+
+// 7.8.2.2 The imaxdiv function
+
+// This is modified version of div() function from Microsoft's div.c found
+// in %MSVC.NET%\crt\src\div.c
+#ifdef STATIC_IMAXDIV // [
+static
+#else // STATIC_IMAXDIV ][
+_inline
+#endif // STATIC_IMAXDIV ]
+imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
+{
+   imaxdiv_t result;
+
+   result.quot = numer / denom;
+   result.rem = numer % denom;
+
+   if (numer < 0 && result.rem > 0) {
+      // did division wrong; must fix up
+      ++result.quot;
+      result.rem -= denom;
+   }
+
+   return result;
+}
+
+// 7.8.2.3 The strtoimax and strtoumax functions
+#define strtoimax _strtoi64
+#define strtoumax _strtoui64
+
+// 7.8.2.4 The wcstoimax and wcstoumax functions
+#define wcstoimax _wcstoi64
+#define wcstoumax _wcstoui64
+
+
+#endif // _MSC_INTTYPES_H_ ]
diff --git a/gk_ms_stat.h b/gk_ms_stat.h
new file mode 100644
index 0000000..a1ef6fa
--- /dev/null
+++ b/gk_ms_stat.h
@@ -0,0 +1,22 @@
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MS_STAT_H_
+#define _MS_STAT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <sys/stat.h>
+/* Test macros for file types.  */
+
+#define __S_ISTYPE(mode, mask)  (((mode) & S_IFMT) == (mask))
+
+#define S_ISDIR(mode)    __S_ISTYPE((mode), S_IFDIR)
+#define S_ISCHR(mode)    __S_ISTYPE((mode), S_IFCHR)
+#define S_ISBLK(mode)    __S_ISTYPE((mode), S_IFBLK)
+#define S_ISREG(mode)    __S_ISTYPE((mode), S_IFREG)
+
+#endif 
diff --git a/gk_ms_stdint.h b/gk_ms_stdint.h
new file mode 100644
index 0000000..7e200dc
--- /dev/null
+++ b/gk_ms_stdint.h
@@ -0,0 +1,222 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+   extern "C++" {
+#endif 
+#     include <wchar.h>
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+   }
+#endif
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+typedef __int8            int8_t;
+typedef __int16           int16_t;
+typedef __int32           int32_t;
+typedef __int64           int64_t;
+typedef unsigned __int8   uint8_t;
+typedef unsigned __int16  uint16_t;
+typedef unsigned __int32  uint32_t;
+typedef unsigned __int64  uint64_t;
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef __int64           intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef int               intptr_t;
+   typedef unsigned int      uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
diff --git a/gk_proto.h b/gk_proto.h
new file mode 100644
index 0000000..6fd6bd4
--- /dev/null
+++ b/gk_proto.h
@@ -0,0 +1,426 @@
+/*!
+\file gk_proto.h
+\brief This file contains function prototypes
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_proto.h 22010 2018-05-14 20:20:26Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_PROTO_H_
+#define _GK_PROTO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*-------------------------------------------------------------
+ * blas.c 
+ *-------------------------------------------------------------*/
+GK_MKBLAS_PROTO(gk_c,   char,     int)
+GK_MKBLAS_PROTO(gk_i,   int,      int)
+GK_MKBLAS_PROTO(gk_i8,  int8_t,   int8_t)
+GK_MKBLAS_PROTO(gk_i16, int16_t,  int16_t)
+GK_MKBLAS_PROTO(gk_i32, int32_t,  int32_t)
+GK_MKBLAS_PROTO(gk_i64, int64_t,  int64_t)
+GK_MKBLAS_PROTO(gk_z,   ssize_t,  ssize_t)
+GK_MKBLAS_PROTO(gk_zu,  size_t,   size_t)
+GK_MKBLAS_PROTO(gk_f,   float,    float)
+GK_MKBLAS_PROTO(gk_d,   double,   double)
+GK_MKBLAS_PROTO(gk_idx, gk_idx_t, gk_idx_t)
+
+
+
+
+/*-------------------------------------------------------------
+ * io.c
+ *-------------------------------------------------------------*/
+FILE *gk_fopen(char *, char *, const char *);
+void gk_fclose(FILE *);
+ssize_t gk_read(int fd, void *vbuf, size_t count);
+ssize_t gk_write(int fd, void *vbuf, size_t count);
+ssize_t gk_getline(char **lineptr, size_t *n, FILE *stream);
+char **gk_readfile(char *fname, size_t *r_nlines);
+int32_t *gk_i32readfile(char *fname, size_t *r_nlines);
+int64_t *gk_i64readfile(char *fname, size_t *r_nlines);
+ssize_t *gk_zreadfile(char *fname, size_t *r_nlines);
+char *gk_creadfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_cwritefilebin(char *fname, size_t n, char *a);
+int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a);
+int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a);
+ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a);
+float *gk_freadfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_fwritefilebin(char *fname, size_t n, float *a);
+double *gk_dreadfilebin(char *fname, size_t *r_nelmnts);
+size_t gk_dwritefilebin(char *fname, size_t n, double *a);
+
+
+
+
+/*-------------------------------------------------------------
+ * fs.c
+ *-------------------------------------------------------------*/
+int gk_fexists(char *);
+int gk_dexists(char *);
+ssize_t gk_getfsize(char *);
+void gk_getfilestats(char *fname, size_t *r_nlines, size_t *r_ntokens, 
+          size_t *r_max_nlntokens, size_t *r_nbytes);
+char *gk_getbasename(char *path);
+char *gk_getextname(char *path);
+char *gk_getfilename(char *path);
+char *gk_getpathname(char *path);
+int gk_mkpath(char *);
+int gk_rmpath(char *);
+
+
+
+/*-------------------------------------------------------------
+ * memory.c
+ *-------------------------------------------------------------*/
+GK_MKALLOC_PROTO(gk_c,    char)
+GK_MKALLOC_PROTO(gk_i,    int)
+GK_MKALLOC_PROTO(gk_i8,   int8_t)
+GK_MKALLOC_PROTO(gk_i16,  int16_t)
+GK_MKALLOC_PROTO(gk_i32,  int32_t)
+GK_MKALLOC_PROTO(gk_i64,  int64_t)
+GK_MKALLOC_PROTO(gk_ui8,  uint8_t)
+GK_MKALLOC_PROTO(gk_ui16, uint16_t)
+GK_MKALLOC_PROTO(gk_ui32, uint32_t)
+GK_MKALLOC_PROTO(gk_ui64, uint64_t)
+GK_MKALLOC_PROTO(gk_z,    ssize_t)
+GK_MKALLOC_PROTO(gk_zu,   size_t)
+GK_MKALLOC_PROTO(gk_f,    float)
+GK_MKALLOC_PROTO(gk_d,    double)
+GK_MKALLOC_PROTO(gk_idx,  gk_idx_t)
+
+GK_MKALLOC_PROTO(gk_ckv,   gk_ckv_t)
+GK_MKALLOC_PROTO(gk_ikv,   gk_ikv_t)
+GK_MKALLOC_PROTO(gk_i8kv,  gk_i8kv_t)
+GK_MKALLOC_PROTO(gk_i16kv, gk_i16kv_t)
+GK_MKALLOC_PROTO(gk_i32kv, gk_i32kv_t)
+GK_MKALLOC_PROTO(gk_i64kv, gk_i64kv_t)
+GK_MKALLOC_PROTO(gk_zkv,   gk_zkv_t)
+GK_MKALLOC_PROTO(gk_zukv,  gk_zukv_t)
+GK_MKALLOC_PROTO(gk_fkv,   gk_fkv_t)
+GK_MKALLOC_PROTO(gk_dkv,   gk_dkv_t)
+GK_MKALLOC_PROTO(gk_skv,   gk_skv_t)
+GK_MKALLOC_PROTO(gk_idxkv, gk_idxkv_t)
+
+void   gk_AllocMatrix(void ***, size_t, size_t , size_t);
+void   gk_FreeMatrix(void ***, size_t, size_t);
+int    gk_malloc_init();
+void   gk_malloc_cleanup(int showstats);
+void  *gk_malloc(size_t nbytes, char *msg);
+void  *gk_realloc(void *oldptr, size_t nbytes, char *msg);
+void   gk_free(void **ptr1,...);
+size_t gk_GetCurMemoryUsed();
+size_t gk_GetMaxMemoryUsed();
+void   gk_GetVMInfo(size_t *vmsize, size_t *vmrss);
+size_t gk_GetProcVmPeak();
+
+
+
+/*-------------------------------------------------------------
+ * seq.c
+ *-------------------------------------------------------------*/
+gk_seq_t *gk_seq_ReadGKMODPSSM(char *file_name);
+gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet);
+void gk_seq_init(gk_seq_t *seq);
+
+
+
+/*-------------------------------------------------------------
+ * error.c
+ *-------------------------------------------------------------*/
+void gk_set_exit_on_error(int value);
+void errexit(char *,...);
+void gk_errexit(int signum, char *,...);
+int gk_sigtrap();
+int gk_siguntrap();
+void gk_sigthrow(int signum);
+void gk_SetSignalHandlers();
+void gk_UnsetSignalHandlers();
+void gk_NonLocalExit_Handler(int signum);
+char *gk_strerror(int errnum);
+void PrintBackTrace();
+
+
+/*-------------------------------------------------------------
+ * util.c
+ *-------------------------------------------------------------*/
+void  gk_RandomPermute(size_t, int *, int);
+void  gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind);
+int   gk_log2(int);
+int   gk_ispow2(int);
+float gk_flog2(float);
+
+
+/*-------------------------------------------------------------
+ * time.c
+ *-------------------------------------------------------------*/
+gk_wclock_t gk_WClockSeconds(void);
+double gk_CPUSeconds(void);
+
+/*-------------------------------------------------------------
+ * string.c
+ *-------------------------------------------------------------*/
+char   *gk_strchr_replace(char *str, char *fromlist, char *tolist);
+int     gk_strstr_replace(char *str, char *pattern, char *replacement, char *options, char **new_str);
+char   *gk_strtprune(char *, char *);
+char   *gk_strhprune(char *, char *);
+char   *gk_strtoupper(char *); 
+char   *gk_strtolower(char *); 
+char   *gk_strdup(char *orgstr);
+int     gk_strcasecmp(char *s1, char *s2);
+int     gk_strrcmp(char *s1, char *s2);
+char   *gk_time2str(time_t time);
+time_t  gk_str2time(char *str);
+int     gk_GetStringID(gk_StringMap_t *strmap, char *key);
+
+
+
+/*-------------------------------------------------------------
+ * sort.c 
+ *-------------------------------------------------------------*/
+void gk_csorti(size_t, char *);
+void gk_csortd(size_t, char *);
+void gk_isorti(size_t, int *);
+void gk_isortd(size_t, int *);
+void gk_i32sorti(size_t, int32_t *);
+void gk_i32sortd(size_t, int32_t *);
+void gk_i64sorti(size_t, int64_t *);
+void gk_i64sortd(size_t, int64_t *);
+void gk_ui32sorti(size_t, uint32_t *);
+void gk_ui32sortd(size_t, uint32_t *);
+void gk_ui64sorti(size_t, uint64_t *);
+void gk_ui64sortd(size_t, uint64_t *);
+void gk_fsorti(size_t, float *);
+void gk_fsortd(size_t, float *);
+void gk_dsorti(size_t, double *);
+void gk_dsortd(size_t, double *);
+void gk_idxsorti(size_t, gk_idx_t *);
+void gk_idxsortd(size_t, gk_idx_t *);
+void gk_ckvsorti(size_t, gk_ckv_t *);
+void gk_ckvsortd(size_t, gk_ckv_t *);
+void gk_ikvsorti(size_t, gk_ikv_t *);
+void gk_ikvsortd(size_t, gk_ikv_t *);
+void gk_i32kvsorti(size_t, gk_i32kv_t *);
+void gk_i32kvsortd(size_t, gk_i32kv_t *);
+void gk_i64kvsorti(size_t, gk_i64kv_t *);
+void gk_i64kvsortd(size_t, gk_i64kv_t *);
+void gk_zkvsorti(size_t, gk_zkv_t *);
+void gk_zkvsortd(size_t, gk_zkv_t *);
+void gk_zukvsorti(size_t, gk_zukv_t *);
+void gk_zukvsortd(size_t, gk_zukv_t *);
+void gk_fkvsorti(size_t, gk_fkv_t *);
+void gk_fkvsortd(size_t, gk_fkv_t *);
+void gk_dkvsorti(size_t, gk_dkv_t *);
+void gk_dkvsortd(size_t, gk_dkv_t *);
+void gk_skvsorti(size_t, gk_skv_t *);
+void gk_skvsortd(size_t, gk_skv_t *);
+void gk_idxkvsorti(size_t, gk_idxkv_t *);
+void gk_idxkvsortd(size_t, gk_idxkv_t *);
+
+
+/*-------------------------------------------------------------
+ * Selection routines
+ *-------------------------------------------------------------*/
+int  gk_dfkvkselect(size_t, int, gk_fkv_t *);
+int  gk_ifkvkselect(size_t, int, gk_fkv_t *);
+
+
+/*-------------------------------------------------------------
+ * Priority queue 
+ *-------------------------------------------------------------*/
+GK_MKPQUEUE_PROTO(gk_ipq,   gk_ipq_t,   int,      gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_i32pq, gk_i32pq_t, int32_t,  gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_i64pq, gk_i64pq_t, int64_t,  gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_fpq,   gk_fpq_t,   float,    gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_dpq,   gk_dpq_t,   double,   gk_idx_t)
+GK_MKPQUEUE_PROTO(gk_idxpq, gk_idxpq_t, gk_idx_t, gk_idx_t)
+
+
+/*-------------------------------------------------------------
+ * HTable routines
+ *-------------------------------------------------------------*/
+gk_HTable_t *HTable_Create(int nelements);
+void         HTable_Reset(gk_HTable_t *htable);
+void         HTable_Resize(gk_HTable_t *htable, int nelements);
+void         HTable_Insert(gk_HTable_t *htable, int key, int val);
+void         HTable_Delete(gk_HTable_t *htable, int key);
+int          HTable_Search(gk_HTable_t *htable, int key);
+int          HTable_GetNext(gk_HTable_t *htable, int key, int *val, int type);
+int          HTable_SearchAndDelete(gk_HTable_t *htable, int key);
+void         HTable_Destroy(gk_HTable_t *htable);
+int          HTable_HFunction(int nelements, int key);
+ 
+
+/*-------------------------------------------------------------
+ * Tokenizer routines
+ *-------------------------------------------------------------*/
+void gk_strtokenize(char *line, char *delim, gk_Tokens_t *tokens);
+void gk_freetokenslist(gk_Tokens_t *tokens);
+
+/*-------------------------------------------------------------
+ * Encoder/Decoder
+ *-------------------------------------------------------------*/
+void encodeblock(unsigned char *in, unsigned char *out);
+void decodeblock(unsigned char *in, unsigned char *out);
+void GKEncodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer);
+void GKDecodeBase64(int nbytes, unsigned char *inbuffer, unsigned char *outbuffer);
+
+
+/*-------------------------------------------------------------
+ * random.c
+ *-------------------------------------------------------------*/
+GK_MKRANDOM_PROTO(gk_c,   size_t, char)
+GK_MKRANDOM_PROTO(gk_i,   size_t, int)
+GK_MKRANDOM_PROTO(gk_i32, size_t, int32_t)
+GK_MKRANDOM_PROTO(gk_f,   size_t, float)
+GK_MKRANDOM_PROTO(gk_d,   size_t, double)
+GK_MKRANDOM_PROTO(gk_idx, size_t, gk_idx_t)
+GK_MKRANDOM_PROTO(gk_z,   size_t, ssize_t)
+GK_MKRANDOM_PROTO(gk_zu,  size_t, size_t)
+void gk_randinit(uint64_t);
+uint64_t gk_randint64(void);
+uint32_t gk_randint32(void);
+
+
+/*-------------------------------------------------------------
+ * OpenMP fake functions
+ *-------------------------------------------------------------*/
+#if !defined(__OPENMP__)
+void omp_set_num_threads(int num_threads);
+int omp_get_num_threads(void);
+int omp_get_max_threads(void);
+int omp_get_thread_num(void);
+int omp_get_num_procs(void);
+int omp_in_parallel(void);
+void omp_set_dynamic(int num_threads);
+int omp_get_dynamic(void);
+void omp_set_nested(int nested);
+int omp_get_nested(void);
+#endif /* __OPENMP__ */
+
+
+/*-------------------------------------------------------------
+ * CSR-related functions
+ *-------------------------------------------------------------*/
+gk_csr_t *gk_csr_Create();
+void gk_csr_Init(gk_csr_t *mat);
+void gk_csr_Free(gk_csr_t **mat);
+void gk_csr_FreeContents(gk_csr_t *mat);
+gk_csr_t *gk_csr_Dup(gk_csr_t *mat);
+gk_csr_t *gk_csr_ExtractSubmatrix(gk_csr_t *mat, int rstart, int nrows);
+gk_csr_t *gk_csr_ExtractRows(gk_csr_t *mat, int nrows, int *rind);
+gk_csr_t *gk_csr_ExtractPartition(gk_csr_t *mat, int *part, int pid);
+gk_csr_t **gk_csr_Split(gk_csr_t *mat, int *color);
+int gk_csr_DetermineFormat(char *filename, int format);
+gk_csr_t *gk_csr_Read(char *filename, int format, int readvals, int numbering);
+void gk_csr_Write(gk_csr_t *mat, char *filename, int format, int writevals, int numbering);
+gk_csr_t *gk_csr_Prune(gk_csr_t *mat, int what, int minf, int maxf);
+gk_csr_t *gk_csr_LowFilter(gk_csr_t *mat, int what, int norm, float fraction);
+gk_csr_t *gk_csr_TopKPlusFilter(gk_csr_t *mat, int what, int topk, float keepval);
+gk_csr_t *gk_csr_ZScoreFilter(gk_csr_t *mat, int what, float zscore);
+void gk_csr_CompactColumns(gk_csr_t *mat);
+void gk_csr_SortIndices(gk_csr_t *mat, int what);
+void gk_csr_CreateIndex(gk_csr_t *mat, int what);
+void gk_csr_Normalize(gk_csr_t *mat, int what, int norm);
+void gk_csr_Scale(gk_csr_t *mat, int type);
+void gk_csr_ComputeSums(gk_csr_t *mat, int what);
+void gk_csr_ComputeNorms(gk_csr_t *mat, int what);
+void gk_csr_ComputeSquaredNorms(gk_csr_t *mat, int what);
+gk_csr_t *gk_csr_Shuffle(gk_csr_t *mat, int what, int summetric);
+gk_csr_t *gk_csr_Transpose(gk_csr_t *mat);
+float gk_csr_ComputeSimilarity(gk_csr_t *mat, int i1, int i2, int what, int simtype);
+float gk_csr_ComputePairSimilarity(gk_csr_t *mat_a, gk_csr_t *mat_b, int i1, int i2, int what, int simtype);
+int gk_csr_GetSimilarRows(gk_csr_t *mat, int nqterms, int *qind, float *qval,
+        int simtype, int nsim, float minsim, gk_fkv_t *hits, int *_imarker,
+        gk_fkv_t *i_cand);
+int gk_csr_FindConnectedComponents(gk_csr_t *mat, int32_t *cptr, int32_t *cind,
+        int32_t *cids);
+gk_csr_t *gk_csr_MakeSymmetric(gk_csr_t *mat, int op);
+gk_csr_t *gk_csr_ReorderSymmetric(gk_csr_t *mat, int32_t *perm, int32_t *iperm);
+void gk_csr_ComputeBFSOrderingSymmetric(gk_csr_t *mat, int maxdegree, int v, 
+          int32_t **r_perm, int32_t **r_iperm);
+void gk_csr_ComputeBestFOrderingSymmetric(gk_csr_t *mat, int v, int type,
+          int32_t **r_perm, int32_t **r_iperm);
+
+
+/* itemsets.c */
+void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind,
+        int minfreq, int maxfreq, int minlen, int maxlen,
+        void (*process_itemset)(void *stateptr, int nitems, int *itemind,
+                                int ntrans, int *tranind),
+        void *stateptr);
+
+
+/* evaluate.c */
+float ComputeAccuracy(int n, gk_fkv_t *list);
+float ComputeROCn(int n, int maxN, gk_fkv_t *list);
+float ComputeMedianRFP(int n, gk_fkv_t *list);
+float ComputeMean (int n, float *values);
+float ComputeStdDev(int  n, float *values);
+
+
+/* mcore.c */
+gk_mcore_t *gk_mcoreCreate(size_t coresize);
+gk_mcore_t *gk_gkmcoreCreate();
+void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats);
+void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats);
+void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes);
+void gk_mcorePush(gk_mcore_t *mcore);
+void gk_gkmcorePush(gk_mcore_t *mcore);
+void gk_mcorePop(gk_mcore_t *mcore);
+void gk_gkmcorePop(gk_mcore_t *mcore);
+void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr);
+void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr);
+void gk_mcoreDel(gk_mcore_t *mcore, void *ptr);
+void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr);
+
+/* rw.c */
+int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr);
+
+
+/* graph.c */
+gk_graph_t *gk_graph_Create();
+void gk_graph_Init(gk_graph_t *graph);
+void gk_graph_Free(gk_graph_t **graph);
+void gk_graph_FreeContents(gk_graph_t *graph);
+gk_graph_t *gk_graph_Read(char *filename, int format, int hasvals, 
+                 int numbering, int isfewgts, int isfvwgts, int isfvsizes);
+void gk_graph_Write(gk_graph_t *graph, char *filename, int format, int numbering);
+gk_graph_t *gk_graph_Dup(gk_graph_t *graph);
+gk_graph_t *gk_graph_Transpose(gk_graph_t *graph);
+gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs);
+gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm);
+int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind);
+void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm, 
+         int32_t **r_iperm);
+void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type,
+              int32_t **r_perm, int32_t **r_iperm);
+void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type,
+              int32_t **r_perm, int32_t **r_iperm);
+void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps);
+void gk_graph_SortAdjacencies(gk_graph_t *graph);
+gk_graph_t *gk_graph_MakeSymmetric(gk_graph_t *graph, int op);
+
+
+/* cache.c */
+gk_cache_t *gk_cacheCreate(uint32_t nway, uint32_t lnbits, size_t cnbits);
+void gk_cacheReset(gk_cache_t *cache);
+void gk_cacheDestroy(gk_cache_t **r_cache);
+int gk_cacheLoad(gk_cache_t *cache, size_t addr);
+double gk_cacheGetHitRate(gk_cache_t *cache);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
+
diff --git a/gk_struct.h b/gk_struct.h
new file mode 100644
index 0000000..2925e98
--- /dev/null
+++ b/gk_struct.h
@@ -0,0 +1,296 @@
+/*!
+\file gk_struct.h
+\brief This file contains various datastructures used/provided by GKlib
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_struct.h 21988 2018-04-16 00:11:19Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_STRUCT_H_
+#define _GK_STRUCT_H_
+
+
+/********************************************************************/
+/*! Generator for gk_??KeyVal_t data structure */
+/********************************************************************/
+#define GK_MKKEYVALUE_T(NAME, KEYTYPE, VALTYPE) \
+typedef struct {\
+  KEYTYPE key;\
+  VALTYPE val;\
+} NAME;\
+
+/* The actual KeyVal data structures */
+GK_MKKEYVALUE_T(gk_ckv_t,   char,     ssize_t)
+GK_MKKEYVALUE_T(gk_ikv_t,   int,      ssize_t)
+GK_MKKEYVALUE_T(gk_i8kv_t,  int8_t,   ssize_t)
+GK_MKKEYVALUE_T(gk_i16kv_t, int16_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_i32kv_t, int32_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_i64kv_t, int64_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_zkv_t,   ssize_t,  ssize_t)
+GK_MKKEYVALUE_T(gk_zukv_t,  size_t,   ssize_t)
+GK_MKKEYVALUE_T(gk_fkv_t,   float,    ssize_t)
+GK_MKKEYVALUE_T(gk_dkv_t,   double,   ssize_t)
+GK_MKKEYVALUE_T(gk_skv_t,   char *,   ssize_t)
+GK_MKKEYVALUE_T(gk_idxkv_t, gk_idx_t, gk_idx_t)
+
+
+
+/********************************************************************/
+/*! Generator for gk_?pq_t data structure */
+/********************************************************************/
+#define GK_MKPQUEUE_T(NAME, KVTYPE)\
+typedef struct {\
+  size_t nnodes;\
+  size_t maxnodes;\
+\
+  /* Heap version of the data structure */ \
+  KVTYPE   *heap;\
+  ssize_t *locator;\
+} NAME;\
+
+GK_MKPQUEUE_T(gk_ipq_t,    gk_ikv_t)
+GK_MKPQUEUE_T(gk_i32pq_t,  gk_i32kv_t)
+GK_MKPQUEUE_T(gk_i64pq_t,  gk_i64kv_t)
+GK_MKPQUEUE_T(gk_fpq_t,    gk_fkv_t)
+GK_MKPQUEUE_T(gk_dpq_t,    gk_dkv_t)
+GK_MKPQUEUE_T(gk_idxpq_t,  gk_idxkv_t)
+
+
+#define GK_MKPQUEUE2_T(NAME, KTYPE, VTYPE)\
+typedef struct {\
+  ssize_t nnodes;\
+  ssize_t maxnodes;\
+\
+  /* Heap version of the data structure */ \
+  KTYPE *keys;\
+  VTYPE *vals;\
+} NAME;\
+
+
+
+/*-------------------------------------------------------------
+ * The following data structure stores a sparse CSR format
+ *-------------------------------------------------------------*/
+typedef struct gk_csr_t {
+  int32_t nrows, ncols;
+  ssize_t *rowptr, *colptr;
+  int32_t *rowind, *colind;
+  int32_t *rowids, *colids;
+  int32_t *rlabels, *clabels;
+  int32_t *rmap, *cmap;
+  float *rowval, *colval;
+  float *rnorms, *cnorms;
+  float *rsums, *csums;
+  float *rsizes, *csizes;
+  float *rvols, *cvols;
+  float *rwgts, *cwgts;
+} gk_csr_t;
+
+
+/*-------------------------------------------------------------
+ * The following data structure stores a sparse graph 
+ *-------------------------------------------------------------*/
+typedef struct gk_graph_t {
+  int32_t nvtxs;                /*!< The number of vertices in the graph */
+  ssize_t *xadj;                /*!< The ptr-structure of the adjncy list */
+  int32_t *adjncy;              /*!< The adjacency list of the graph */
+  int32_t *iadjwgt;             /*!< The integer edge weights */
+  float *fadjwgt;               /*!< The floating point edge weights */
+  int32_t *ivwgts;              /*!< The integer vertex weights */
+  float *fvwgts;                /*!< The floating point vertex weights */
+  int32_t *ivsizes;             /*!< The integer vertex sizes */
+  float *fvsizes;               /*!< The floating point vertex sizes */
+  int32_t *vlabels;             /*!< The labels of the vertices */
+} gk_graph_t;
+
+
+/*-------------------------------------------------------------
+ * The following data structure stores stores a string as a 
+ * pair of its allocated buffer and the buffer itself.
+ *-------------------------------------------------------------*/
+typedef struct gk_str_t {
+  size_t len;
+  char *buf;
+} gk_str_t;
+
+
+
+
+/*-------------------------------------------------------------
+* The following data structure implements a string-2-int mapping
+* table used for parsing command-line options
+*-------------------------------------------------------------*/
+typedef struct gk_StringMap_t {
+  char *name;
+  int id;
+} gk_StringMap_t;
+
+
+/*------------------------------------------------------------
+ * This structure implements a simple hash table
+ *------------------------------------------------------------*/
+typedef struct gk_HTable_t {
+  int nelements;          /* The overall size of the hash-table */
+  int htsize;             /* The current size of the hash-table */
+  gk_ikv_t *harray;       /* The actual hash-table */
+} gk_HTable_t;
+
+
+/*------------------------------------------------------------
+ * This structure implements a gk_Tokens_t list returned by the
+ * string tokenizer
+ *------------------------------------------------------------*/
+typedef struct gk_Tokens_t {
+  int ntoks;        /* The number of tokens in the input string */
+  char *strbuf;     /* The memory that stores all the entries */
+  char **list;      /* Pointers to the strbuf for each element */
+} gk_Tokens_t;
+
+
+/*------------------------------------------------------------
+ * This structure implements storage for an atom in a pdb file
+ *------------------------------------------------------------*/
+typedef struct atom {
+  int       serial;
+  char      *name;
+  char	    altLoc;
+  char      *resname;
+  char      chainid;	
+  int       rserial;
+  char	    icode;
+  char      element;
+  double    x;
+  double    y;
+  double    z;
+  double    opcy;
+  double    tmpt;
+} atom;
+
+
+/*------------------------------------------------------------
+ * This structure implements storage for a center of mass for
+ * a single residue.
+ *------------------------------------------------------------*/
+typedef struct center_of_mass {
+  char name;
+  double x;
+  double y;
+  double z;
+} center_of_mass;
+
+
+/*------------------------------------------------------------
+ * This structure implements storage for a pdb protein 
+ *------------------------------------------------------------*/
+typedef struct pdbf {
+	int natoms;			/* Number of atoms */
+	int nresidues;  /* Number of residues based on coordinates */
+	int ncas;
+	int nbbs;
+	int corruption;
+	char *resSeq;	      /* Residue sequence based on coordinates    */
+  char **threeresSeq; /* three-letter residue sequence */
+	atom *atoms;
+	atom **bbs;
+	atom **cas;
+  center_of_mass *cm;
+} pdbf;
+
+
+
+/*************************************************************
+* Localization Structures for converting characters to integers
+**************************************************************/
+typedef struct gk_i2cc2i_t {
+    int n;
+    char *i2c;
+    int *c2i;
+} gk_i2cc2i_t;
+ 
+
+/*******************************************************************
+ *This structure implements storage of a protein sequence
+ * *****************************************************************/
+typedef struct gk_seq_t {
+    
+    int len; /*Number of Residues */
+    int *sequence; /* Stores the sequence*/
+    
+    
+    int **pssm; /* Stores the pssm matrix */
+    int **psfm; /* Stores the psfm matrix */
+    char *name; /* Stores the name of the sequence */
+
+    int nsymbols;
+
+    
+} gk_seq_t;
+
+
+
+
+/*************************************************************************/
+/*! The following data structure stores information about a memory 
+    allocation operation that can either be served from gk_mcore_t or by
+    a gk_malloc if not sufficient workspace memory is available. */
+/*************************************************************************/
+typedef struct gk_mop_t {
+  int type;
+  ssize_t nbytes;
+  void *ptr;
+} gk_mop_t;
+
+
+/*************************************************************************/
+/*! The following structure defines the mcore for GKlib's customized
+    memory allocations. */
+/*************************************************************************/
+typedef struct gk_mcore_t {
+  /* Workspace information */
+  size_t coresize;     /*!< The amount of core memory that has been allocated */
+  size_t corecpos;     /*!< Index of the first free location in core */
+  void *core;	       /*!< Pointer to the core itself */
+
+  /* These are for implementing a stack-based allocation scheme using both
+     core and also dynamically allocated memory */
+  size_t nmops;         /*!< The number of maop_t entries that have been allocated */
+  size_t cmop;          /*!< Index of the first free location in maops */
+  gk_mop_t *mops;       /*!< The array recording the maop_t operations */
+
+  /* These are for keeping various statistics for wspacemalloc */
+  size_t num_callocs;   /*!< The number of core mallocs */
+  size_t num_hallocs;   /*!< The number of heap mallocs */
+  size_t size_callocs;  /*!< The total # of bytes in core mallocs */
+  size_t size_hallocs;  /*!< The total # of bytes in heap mallocs */
+  size_t cur_callocs;   /*!< The current # of bytes in core mallocs */
+  size_t cur_hallocs;   /*!< The current # of bytes in heap mallocs */
+  size_t max_callocs;   /*!< The maximum # of bytes in core mallocs at any given time */
+  size_t max_hallocs;   /*!< The maximum # of bytes in heap mallocs at any given time */
+
+} gk_mcore_t;
+
+
+/*************************************************************************/
+/*! The following structure is used for cache simulation for performance
+    modeling and analysis. */
+/*************************************************************************/
+typedef struct gk_cache_t {
+  /*! The total cache is nway*(2^(cnbits+lnbits)) bytes */
+  uint32_t nway;        /*!< the associativity of the cache */
+  uint32_t lnbits;      /*!< the number of address bits indexing the cache line */
+  uint32_t cnbits;      /*!< the number of address bits indexing the cache */
+  size_t csize;         /*!< 2^cnbits */
+  size_t cmask;         /*!< csize-1 */
+
+  uint64_t clock;       /*!< a clock in terms of accesses */
+  
+  uint64_t *latimes;    /*!< a cacheline-level last access time */
+  size_t *clines;       /*!< the cache in terms of cachelines */
+
+  uint64_t nhits;       /*!< counts the number of hits */
+  uint64_t nmisses;     /*!< counts the number of misses */
+} gk_cache_t;
+
+
+#endif
diff --git a/gk_types.h b/gk_types.h
new file mode 100644
index 0000000..57c1191
--- /dev/null
+++ b/gk_types.h
@@ -0,0 +1,38 @@
+/*!
+\file  gk_types.h
+\brief This file contains basic scalar datatype used in GKlib
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: gk_types.h 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#ifndef _GK_TYPES_H_
+#define _GK_TYPES_H_
+
+/*************************************************************************
+* Basic data type definitions. These definitions allow GKlib to separate
+* the following elemental types:
+* - loop iterator variables, which are set to size_t
+* - signed and unsigned int variables that can be set to any # of bits
+* - signed and unsigned long variables that can be set to any # of bits
+* - real variables, which can be set to single or double precision.
+**************************************************************************/
+/*typedef ptrdiff_t       gk_idx_t;       */  /* index variable */
+typedef ssize_t         gk_idx_t;         /* index variable */
+typedef int32_t         gk_int_t;         /* integer values */
+typedef uint32_t        gk_uint_t;        /* unsigned integer values */
+typedef int64_t         gk_long_t;        /* long integer values */
+typedef uint64_t        gk_ulong_t;       /* unsigned long integer values */
+typedef float           gk_real_t;        /* real type */
+typedef double          gk_dreal_t;       /* double precission real type */
+typedef double          gk_wclock_t;	  /* wall-clock time */
+
+/*#define GK_IDX_MAX PTRDIFF_MAX*/
+#define GK_IDX_MAX ((SIZE_MAX>>1)-2)
+
+#define PRIGKIDX "zd"
+#define SCNGKIDX "zd"
+
+
+#endif
diff --git a/gk_util.c b/gk_util.c
new file mode 100644
index 0000000..e1e68db
--- /dev/null
+++ b/gk_util.c
@@ -0,0 +1,107 @@
+/*!
+\file  util.c
+\brief Various utility routines
+
+\date   Started 4/12/2007
+\author George
+\version\verbatim $Id: gk_util.c 16223 2014-02-15 21:34:09Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+/*************************************************************************
+* This file randomly permutes the contents of an array.
+* flag == 0, don't initialize perm
+* flag == 1, set p[i] = i 
+**************************************************************************/
+void gk_RandomPermute(size_t n, int *p, int flag)
+{
+  size_t i, u, v;
+  int tmp;
+
+  if (flag == 1) {
+    for (i=0; i<n; i++)
+      p[i] = i;
+  }
+
+  for (i=0; i<n/2; i++) {
+    v = RandomInRange(n);
+    u = RandomInRange(n);
+    gk_SWAP(p[v], p[u], tmp);
+  }
+}
+
+
+/************************************************************************/
+/*!
+\brief Converts an element-based set membership into a CSR-format set-based
+       membership.
+
+For example, it takes an array such as part[] that stores where each 
+element belongs to and returns a pair of arrays (pptr[], pind[]) that 
+store in CSF format the list of elements belonging in each partition.
+
+\param n      
+  the number of elements in the array (e.g., # of vertices)
+\param range  
+  the cardinality of the set (e.g., # of partitions)
+\param array
+  the array that stores the per-element set membership
+\param ptr
+  the array that will store the starting indices in ind for
+  the elements of each set. This is filled by the routine and
+  its size should be at least range+1.
+\param ind
+  the array that stores consecutively which elements belong to
+  each set. The size of this array should be n.
+*/
+/************************************************************************/
+void gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind)
+{
+  size_t i;
+
+  gk_iset(range+1, 0, ptr);
+
+  for (i=0; i<n; i++) 
+    ptr[array[i]]++;
+
+  /* Compute the ptr, ind structure */
+  MAKECSR(i, range, ptr);
+  for (i=0; i<n; i++)
+    ind[ptr[array[i]]++] = i;
+  SHIFTCSR(i, range, ptr);
+}
+
+
+/*************************************************************************
+* This function returns the log2(x)
+**************************************************************************/
+int gk_log2(int a)
+{
+  size_t i;
+
+  for (i=1; a > 1; i++, a = a>>1);
+  return i-1;
+}
+
+
+/*************************************************************************
+* This function checks if the argument is a power of 2
+**************************************************************************/
+int gk_ispow2(int a)
+{
+  return (a == (1<<gk_log2(a)));
+}
+
+
+/*************************************************************************
+* This function returns the log2(x)
+**************************************************************************/
+float gk_flog2(float a)
+{
+  return log(a)/log(2.0);
+}
+
+
diff --git a/gkregex.c b/gkregex.c
new file mode 100644
index 0000000..8a09caa
--- /dev/null
+++ b/gkregex.c
@@ -0,0 +1,10704 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* this is for removing a compiler warning */
+void gkfooo() { return; }
+
+#ifdef USE_GKREGEX
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean.  */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+	__regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+	__re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+	__re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+	__re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+	__re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+# include "../locale/localeinfo.h"
+#endif
+
+#include "GKlib.h"
+
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regex_internal.h" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__MINGW32_VERSION) || defined(_MSC_VER)
+#define strcasecmp stricmp
+#endif
+
+#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+# include <langinfo.h>
+#endif
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+#if defined HAVE_STDBOOL_H || defined _LIBC
+# include <stdbool.h>
+#else
+typedef enum { false, true } bool;
+#endif /* HAVE_STDBOOL_H || _LIBC */
+#if defined HAVE_STDINT_H || defined _LIBC
+# include <stdint.h>
+#endif /* HAVE_STDINT_H || _LIBC */
+#if defined _LIBC
+# include <bits/libc-lock.h>
+#else
+# define __libc_lock_define(CLASS,NAME)
+# define __libc_lock_init(NAME) do { } while (0)
+# define __libc_lock_lock(NAME) do { } while (0)
+# define __libc_lock_unlock(NAME) do { } while (0)
+#endif
+
+/* In case that the system doesn't have isblank().  */
+#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+#  define _RE_DEFINE_LOCALE_FUNCTIONS 1
+#   include <locale/localeinfo.h>
+#   include <locale/elem-hash.h>
+#   include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages.  */
+#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+#  undef gettext
+#  define gettext(msgid) \
+  INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+   strings.  */
+# define gettext_noop(String) String
+#endif
+
+/* For loser systems without the definition.  */
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# define inline
+#endif
+
+/* Number of single byte character.  */
+#define SBC_MAX 256
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline.  */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc.  */
+#ifndef _LIBC
+# define __wctype wctype
+# define __iswctype iswctype
+# define __btowc btowc
+# define __mempcpy mempcpy
+# define __wcrtomb wcrtomb
+# define __regfree regfree
+# define attribute_hidden
+#endif /* not _LIBC */
+
+#ifdef __GNUC__
+# define __attribute(arg) __attribute__ (arg)
+#else
+# define __attribute(arg)
+#endif
+
+extern const char __re_error_msgid[] attribute_hidden;
+extern const size_t __re_error_msgid_idx[] attribute_hidden;
+
+/* An integer used to represent a set of bits.  It must be unsigned,
+   and must be at least as wide as unsigned int.  */
+typedef unsigned long int bitset_word_t;
+/* All bits set in a bitset_word_t.  */
+#define BITSET_WORD_MAX ULONG_MAX
+/* Number of bits in a bitset_word_t.  */
+#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
+/* Number of bitset_word_t in a bit_set.  */
+#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
+typedef bitset_word_t bitset_t[BITSET_WORDS];
+typedef bitset_word_t *re_bitset_ptr_t;
+typedef const bitset_word_t *re_const_bitset_ptr_t;
+
+#define bitset_set(set,i) \
+  (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
+#define bitset_clear(set,i) \
+  (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_contain(set,i) \
+  (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
+#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
+#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define WORD_DELIM_CONSTRAINT 0x0100
+#define NOT_WORD_DELIM_CONSTRAINT 0x0200
+
+typedef enum
+{
+  INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+  WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+  LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+  BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+  BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+  WORD_DELIM = WORD_DELIM_CONSTRAINT,
+  NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+  int alloc;
+  int nelem;
+  int *elems;
+} re_node_set;
+
+typedef enum
+{
+  NON_TYPE = 0,
+
+  /* Node type, These are used by token, node, tree.  */
+  CHARACTER = 1,
+  END_OF_RE = 2,
+  SIMPLE_BRACKET = 3,
+  OP_BACK_REF = 4,
+  OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+  COMPLEX_BRACKET = 6,
+  OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+  /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
+     when the debugger shows values of this enum type.  */
+#define EPSILON_BIT 8
+  OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+  OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+  OP_ALT = EPSILON_BIT | 2,
+  OP_DUP_ASTERISK = EPSILON_BIT | 3,
+  ANCHOR = EPSILON_BIT | 4,
+
+  /* Tree type, these are used only by tree. */
+  CONCAT = 16,
+  SUBEXP = 17,
+
+  /* Token type, these are used only by token.  */
+  OP_DUP_PLUS = 18,
+  OP_DUP_QUESTION,
+  OP_OPEN_BRACKET,
+  OP_CLOSE_BRACKET,
+  OP_CHARSET_RANGE,
+  OP_OPEN_DUP_NUM,
+  OP_CLOSE_DUP_NUM,
+  OP_NON_MATCH_LIST,
+  OP_OPEN_COLL_ELEM,
+  OP_CLOSE_COLL_ELEM,
+  OP_OPEN_EQUIV_CLASS,
+  OP_CLOSE_EQUIV_CLASS,
+  OP_OPEN_CHAR_CLASS,
+  OP_CLOSE_CHAR_CLASS,
+  OP_WORD,
+  OP_NOTWORD,
+  OP_SPACE,
+  OP_NOTSPACE,
+  BACK_SLASH
+
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+  /* Multibyte characters.  */
+  wchar_t *mbchars;
+
+  /* Collating symbols.  */
+# ifdef _LIBC
+  int32_t *coll_syms;
+# endif
+
+  /* Equivalence classes. */
+# ifdef _LIBC
+  int32_t *equiv_classes;
+# endif
+
+  /* Range expressions. */
+# ifdef _LIBC
+  uint32_t *range_starts;
+  uint32_t *range_ends;
+# else /* not _LIBC */
+  wchar_t *range_starts;
+  wchar_t *range_ends;
+# endif /* not _LIBC */
+
+  /* Character classes. */
+  wctype_t *char_classes;
+
+  /* If this character set is the non-matching list.  */
+  unsigned int non_match : 1;
+
+  /* # of multibyte characters.  */
+  int nmbchars;
+
+  /* # of collating symbols.  */
+  int ncoll_syms;
+
+  /* # of equivalence classes. */
+  int nequiv_classes;
+
+  /* # of range expressions. */
+  int nranges;
+
+  /* # of character classes. */
+  int nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+  union
+  {
+    unsigned char c;		/* for CHARACTER */
+    re_bitset_ptr_t sbcset;	/* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+    re_charset_t *mbcset;	/* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+    int idx;			/* for BACK_REF */
+    re_context_type ctx_type;	/* for ANCHOR */
+  } opr;
+#if __GNUC__ >= 2
+  re_token_type_t type : 8;
+#else
+  re_token_type_t type;
+#endif
+  unsigned int constraint : 10;	/* context constraint */
+  unsigned int duplicated : 1;
+  unsigned int opt_subexp : 1;
+#ifdef RE_ENABLE_I18N
+  unsigned int accept_mb : 1;
+  /* These 2 bits can be moved into the union if needed (e.g. if running out
+     of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
+  unsigned int mb_partial : 1;
+#endif
+  unsigned int word_char : 1;
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+
+struct re_string_t
+{
+  /* Indicate the raw buffer which is the original string passed as an
+     argument of regexec(), re_search(), etc..  */
+  const unsigned char *raw_mbs;
+  /* Store the multibyte string.  In case of "case insensitive mode" like
+     REG_ICASE, upper cases of the string are stored, otherwise MBS points
+     the same address that RAW_MBS points.  */
+  unsigned char *mbs;
+#ifdef RE_ENABLE_I18N
+  /* Store the wide character string which is corresponding to MBS.  */
+  wint_t *wcs;
+  int *offsets;
+  mbstate_t cur_state;
+#endif
+  /* Index in RAW_MBS.  Each character mbs[i] corresponds to
+     raw_mbs[raw_mbs_idx + i].  */
+  int raw_mbs_idx;
+  /* The length of the valid characters in the buffers.  */
+  int valid_len;
+  /* The corresponding number of bytes in raw_mbs array.  */
+  int valid_raw_len;
+  /* The length of the buffers MBS and WCS.  */
+  int bufs_len;
+  /* The index in MBS, which is updated by re_string_fetch_byte.  */
+  int cur_idx;
+  /* length of RAW_MBS array.  */
+  int raw_len;
+  /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN.  */
+  int len;
+  /* End of the buffer may be shorter than its length in the cases such
+     as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
+     instead of LEN.  */
+  int raw_stop;
+  /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS.  */
+  int stop;
+
+  /* The context of mbs[0].  We store the context independently, since
+     the context of mbs[0] may be different from raw_mbs[0], which is
+     the beginning of the input string.  */
+  unsigned int tip_context;
+  /* The translation passed as a part of an argument of re_compile_pattern.  */
+  RE_TRANSLATE_TYPE trans;
+  /* Copy of re_dfa_t's word_char.  */
+  re_const_bitset_ptr_t word_char;
+  /* 1 if REG_ICASE.  */
+  unsigned char icase;
+  unsigned char is_utf8;
+  unsigned char map_notascii;
+  unsigned char mbs_allocated;
+  unsigned char offsets_needed;
+  unsigned char newline_anchor;
+  unsigned char word_ops_used;
+  int mb_cur_max;
+};
+typedef struct re_string_t re_string_t;
+
+
+struct re_dfa_t;
+typedef struct re_dfa_t re_dfa_t;
+
+#ifndef _LIBC
+# ifdef __i386__
+#  define internal_function   __attribute ((regparm (3), stdcall))
+# else
+#  define internal_function
+# endif
+#endif
+
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+						int new_buf_len)
+     internal_function;
+#ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr) internal_function;
+static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
+#endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr) internal_function;
+static void re_string_translate_buffer (re_string_t *pstr) internal_function;
+static unsigned int re_string_context_at (const re_string_t *input, int idx,
+					  int eflags)
+     internal_function __attribute ((pure));
+#define re_string_peek_byte(pstr, offset) \
+  ((pstr)->mbs[(pstr)->cur_idx + offset])
+#define re_string_fetch_byte(pstr) \
+  ((pstr)->mbs[(pstr)->cur_idx++])
+#define re_string_first_byte(pstr, idx) \
+  ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
+#define re_string_is_single_byte_char(pstr, idx) \
+  ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
+				|| (pstr)->wcs[(idx) + 1] != WEOF))
+#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
+#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
+#define re_string_get_buffer(pstr) ((pstr)->mbs)
+#define re_string_length(pstr) ((pstr)->len)
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
+#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
+#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+
+#ifdef __GNUC__
+# define alloca(size)   __builtin_alloca (size)
+# define HAVE_ALLOCA 1
+#elif defined(_MSC_VER)
+# include <malloc.h>
+# define alloca _alloca
+# define HAVE_ALLOCA 1
+#else
+# error No alloca()
+#endif
+
+#ifndef _LIBC
+# if HAVE_ALLOCA
+/* The OS usually guarantees only one guard page at the bottom of the stack,
+   and a page size can be as small as 4096 bytes.  So we cannot safely
+   allocate anything larger than 4096 bytes.  Also care for the possibility
+   of a few compiler-allocated temporary stack slots.  */
+#  define __libc_use_alloca(n) ((n) < 4032)
+# else
+/* alloca is implemented with malloc, so just use malloc.  */
+#  define __libc_use_alloca(n) 0
+# endif
+#endif
+
+#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
+#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_free(p) free (p)
+
+struct bin_tree_t
+{
+  struct bin_tree_t *parent;
+  struct bin_tree_t *left;
+  struct bin_tree_t *right;
+  struct bin_tree_t *first;
+  struct bin_tree_t *next;
+
+  re_token_t token;
+
+  /* `node_idx' is the index in dfa->nodes, if `type' == 0.
+     Otherwise `type' indicate the type of this node.  */
+  int node_idx;
+};
+typedef struct bin_tree_t bin_tree_t;
+
+#define BIN_TREE_STORAGE_SIZE \
+  ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
+
+struct bin_tree_storage_t
+{
+  struct bin_tree_storage_t *next;
+  bin_tree_t data[BIN_TREE_STORAGE_SIZE];
+};
+typedef struct bin_tree_storage_t bin_tree_storage_t;
+
+#define CONTEXT_WORD 1
+#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
+#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
+#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
+
+#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
+#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
+#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
+#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
+#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
+
+#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
+#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
+#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
+#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
+
+#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
+ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+  || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+  || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
+  || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
+
+#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
+ ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+  || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+  || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
+  || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
+
+struct re_dfastate_t
+{
+  unsigned int hash;
+  re_node_set nodes;
+  re_node_set non_eps_nodes;
+  re_node_set inveclosure;
+  re_node_set *entrance_nodes;
+  struct re_dfastate_t **trtable, **word_trtable;
+  unsigned int context : 4;
+  unsigned int halt : 1;
+  /* If this state can accept `multi byte'.
+     Note that we refer to multibyte characters, and multi character
+     collating elements as `multi byte'.  */
+  unsigned int accept_mb : 1;
+  /* If this state has backreference node(s).  */
+  unsigned int has_backref : 1;
+  unsigned int has_constraint : 1;
+};
+typedef struct re_dfastate_t re_dfastate_t;
+
+struct re_state_table_entry
+{
+  int num;
+  int alloc;
+  re_dfastate_t **array;
+};
+
+/* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
+
+typedef struct
+{
+  int next_idx;
+  int alloc;
+  re_dfastate_t **array;
+} state_array_t;
+
+/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
+
+typedef struct
+{
+  int node;
+  int str_idx; /* The position NODE match at.  */
+  state_array_t path;
+} re_sub_match_last_t;
+
+/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
+   And information about the node, whose type is OP_CLOSE_SUBEXP,
+   corresponding to NODE is stored in LASTS.  */
+
+typedef struct
+{
+  int str_idx;
+  int node;
+  state_array_t *path;
+  int alasts; /* Allocation size of LASTS.  */
+  int nlasts; /* The number of LASTS.  */
+  re_sub_match_last_t **lasts;
+} re_sub_match_top_t;
+
+struct re_backref_cache_entry
+{
+  int node;
+  int str_idx;
+  int subexp_from;
+  int subexp_to;
+  char more;
+  char unused;
+  unsigned short int eps_reachable_subexps_map;
+};
+
+typedef struct
+{
+  /* The string object corresponding to the input string.  */
+  re_string_t input;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+  const re_dfa_t *const dfa;
+#else
+  const re_dfa_t *dfa;
+#endif
+  /* EFLAGS of the argument of regexec.  */
+  int eflags;
+  /* Where the matching ends.  */
+  int match_last;
+  int last_node;
+  /* The state log used by the matcher.  */
+  re_dfastate_t **state_log;
+  int state_log_top;
+  /* Back reference cache.  */
+  int nbkref_ents;
+  int abkref_ents;
+  struct re_backref_cache_entry *bkref_ents;
+  int max_mb_elem_len;
+  int nsub_tops;
+  int asub_tops;
+  re_sub_match_top_t **sub_tops;
+} re_match_context_t;
+
+typedef struct
+{
+  re_dfastate_t **sifted_states;
+  re_dfastate_t **limited_states;
+  int last_node;
+  int last_str_idx;
+  re_node_set limits;
+} re_sift_context_t;
+
+struct re_fail_stack_ent_t
+{
+  int idx;
+  int node;
+  regmatch_t *regs;
+  re_node_set eps_via_nodes;
+};
+
+struct re_fail_stack_t
+{
+  int num;
+  int alloc;
+  struct re_fail_stack_ent_t *stack;
+};
+
+struct re_dfa_t
+{
+  re_token_t *nodes;
+  size_t nodes_alloc;
+  size_t nodes_len;
+  int *nexts;
+  int *org_indices;
+  re_node_set *edests;
+  re_node_set *eclosures;
+  re_node_set *inveclosures;
+  struct re_state_table_entry *state_table;
+  re_dfastate_t *init_state;
+  re_dfastate_t *init_state_word;
+  re_dfastate_t *init_state_nl;
+  re_dfastate_t *init_state_begbuf;
+  bin_tree_t *str_tree;
+  bin_tree_storage_t *str_tree_storage;
+  re_bitset_ptr_t sb_char;
+  int str_tree_storage_idx;
+
+  /* number of subexpressions `re_nsub' is in regex_t.  */
+  unsigned int state_hash_mask;
+  int init_node;
+  int nbackref; /* The number of backreference in this dfa.  */
+
+  /* Bitmap expressing which backreference is used.  */
+  bitset_word_t used_bkref_map;
+  bitset_word_t completed_bkref_map;
+
+  unsigned int has_plural_match : 1;
+  /* If this dfa has "multibyte node", which is a backreference or
+     a node which can accept multibyte character or multi character
+     collating element.  */
+  unsigned int has_mb_node : 1;
+  unsigned int is_utf8 : 1;
+  unsigned int map_notascii : 1;
+  unsigned int word_ops_used : 1;
+  int mb_cur_max;
+  bitset_t word_char;
+  reg_syntax_t syntax;
+  int *subexp_map;
+#ifdef DEBUG
+  char* re_str;
+#endif
+  __libc_lock_define (, lock)
+};
+
+#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
+#define re_node_set_remove(set,id) \
+  (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
+#define re_node_set_empty(p) ((p)->nelem = 0)
+#define re_node_set_free(set) re_free ((set)->elems)
+
+
+typedef enum
+{
+  SB_CHAR,
+  MB_CHAR,
+  EQUIV_CLASS,
+  COLL_SYM,
+  CHAR_CLASS
+} bracket_elem_type;
+
+typedef struct
+{
+  bracket_elem_type type;
+  union
+  {
+    unsigned char ch;
+    unsigned char *name;
+    wchar_t wch;
+  } opr;
+} bracket_elem_t;
+
+
+/* Inline functions for bitset operation.  */
+static inline void
+bitset_not (bitset_t set)
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+    set[bitset_i] = ~set[bitset_i];
+}
+
+static inline void
+bitset_merge (bitset_t dest, const bitset_t src)
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+    dest[bitset_i] |= src[bitset_i];
+}
+
+static inline void
+bitset_mask (bitset_t dest, const bitset_t src)
+{
+  int bitset_i;
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+    dest[bitset_i] &= src[bitset_i];
+}
+
+#ifdef RE_ENABLE_I18N
+/* Inline functions for re_string.  */
+static inline int
+internal_function __attribute ((pure))
+re_string_char_size_at (const re_string_t *pstr, int idx)
+{
+  int byte_idx;
+  if (pstr->mb_cur_max == 1)
+    return 1;
+  for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
+    if (pstr->wcs[idx + byte_idx] != WEOF)
+      break;
+  return byte_idx;
+}
+
+static inline wint_t
+internal_function __attribute ((pure))
+re_string_wchar_at (const re_string_t *pstr, int idx)
+{
+  if (pstr->mb_cur_max == 1)
+    return (wint_t) pstr->mbs[idx];
+  return (wint_t) pstr->wcs[idx];
+}
+
+static int
+internal_function __attribute ((pure))
+re_string_elem_size_at (const re_string_t *pstr, int idx)
+{
+# ifdef _LIBC
+  const unsigned char *p, *extra;
+  const int32_t *table, *indirect;
+  int32_t tmp;
+#  include <locale/weight.h>
+  uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+
+  if (nrules != 0)
+    {
+      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+      extra = (const unsigned char *)
+	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						_NL_COLLATE_INDIRECTMB);
+      p = pstr->mbs + idx;
+      tmp = findidx (&p);
+      return p - pstr->mbs - idx;
+    }
+  else
+# endif /* _LIBC */
+    return 1;
+}
+#endif /* RE_ENABLE_I18N */
+
+#endif /*  _REGEX_INTERNAL_H */
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regex_internal.c" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+static void re_string_construct_common (const char *str, int len,
+					re_string_t *pstr,
+					RE_TRANSLATE_TYPE trans, int icase,
+					const re_dfa_t *dfa) internal_function;
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
+					  const re_node_set *nodes,
+					  unsigned int hash) internal_function;
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
+					  const re_node_set *nodes,
+					  unsigned int context,
+					  unsigned int hash) internal_function;
+
+/* Functions for string operation.  */
+
+/* This function allocate the buffers.  It is necessary to call
+   re_string_reconstruct before using the object.  */
+
+static reg_errcode_t
+internal_function
+re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
+		    RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+  reg_errcode_t ret;
+  int init_buf_len;
+
+  /* Ensure at least one character fits into the buffers.  */
+  if (init_len < dfa->mb_cur_max)
+    init_len = dfa->mb_cur_max;
+  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+  re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+  ret = re_string_realloc_buffers (pstr, init_buf_len);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  pstr->word_char = dfa->word_char;
+  pstr->word_ops_used = dfa->word_ops_used;
+  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
+  pstr->valid_raw_len = pstr->valid_len;
+  return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them.  */
+
+static reg_errcode_t
+internal_function
+re_string_construct (re_string_t *pstr, const char *str, int len,
+		     RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+  reg_errcode_t ret;
+  memset (pstr, '\0', sizeof (re_string_t));
+  re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+  if (len > 0)
+    {
+      ret = re_string_realloc_buffers (pstr, len + 1);
+      if (BE (ret != REG_NOERROR, 0))
+	return ret;
+    }
+  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+
+  if (icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	{
+	  while (1)
+	    {
+	      ret = build_wcs_upper_buffer (pstr);
+	      if (BE (ret != REG_NOERROR, 0))
+		return ret;
+	      if (pstr->valid_raw_len >= len)
+		break;
+	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
+		break;
+	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+	      if (BE (ret != REG_NOERROR, 0))
+		return ret;
+	    }
+	}
+      else
+#endif /* RE_ENABLE_I18N  */
+	build_upper_buffer (pstr);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+	{
+	  if (trans != NULL)
+	    re_string_translate_buffer (pstr);
+	  else
+	    {
+	      pstr->valid_len = pstr->bufs_len;
+	      pstr->valid_raw_len = pstr->bufs_len;
+	    }
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct.  */
+
+static reg_errcode_t
+internal_function
+re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
+{
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1)
+    {
+      wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
+      if (BE (new_wcs == NULL, 0))
+	return REG_ESPACE;
+      pstr->wcs = new_wcs;
+      if (pstr->offsets != NULL)
+	{
+	  int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
+	  if (BE (new_offsets == NULL, 0))
+	    return REG_ESPACE;
+	  pstr->offsets = new_offsets;
+	}
+    }
+#endif /* RE_ENABLE_I18N  */
+  if (pstr->mbs_allocated)
+    {
+      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
+					   new_buf_len);
+      if (BE (new_mbs == NULL, 0))
+	return REG_ESPACE;
+      pstr->mbs = new_mbs;
+    }
+  pstr->bufs_len = new_buf_len;
+  return REG_NOERROR;
+}
+
+
+static void
+internal_function
+re_string_construct_common (const char *str, int len, re_string_t *pstr,
+			    RE_TRANSLATE_TYPE trans, int icase,
+			    const re_dfa_t *dfa)
+{
+  pstr->raw_mbs = (const unsigned char *) str;
+  pstr->len = len;
+  pstr->raw_len = len;
+  pstr->trans = trans;
+  pstr->icase = icase ? 1 : 0;
+  pstr->mbs_allocated = (trans != NULL || icase);
+  pstr->mb_cur_max = dfa->mb_cur_max;
+  pstr->is_utf8 = dfa->is_utf8;
+  pstr->map_notascii = dfa->map_notascii;
+  pstr->stop = pstr->len;
+  pstr->raw_stop = pstr->stop;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+   If the byte sequence of the string are:
+     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+   Then wide character buffer will be:
+     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
+   We use WEOF for padding, they indicate that the position isn't
+   a first byte of a multibyte character.
+
+   Note that this function assumes PSTR->VALID_LEN elements are already
+   built and starts from PSTR->VALID_LEN.  */
+
+static void
+internal_function
+build_wcs_buffer (re_string_t *pstr)
+{
+#ifdef _LIBC
+  unsigned char buf[MB_LEN_MAX];
+  assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+  unsigned char buf[64];
+#endif
+  mbstate_t prev_st;
+  int byte_idx, end_idx, remain_len;
+  size_t mbclen;
+
+  /* Build the buffers from pstr->valid_len to either pstr->len or
+     pstr->bufs_len.  */
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+    {
+      wchar_t wc;
+      const char *p;
+
+      remain_len = end_idx - byte_idx;
+      prev_st = pstr->cur_state;
+      /* Apply the translation if we need.  */
+      if (BE (pstr->trans != NULL, 0))
+	{
+	  int i, ch;
+
+	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+	    {
+	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
+	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
+	    }
+	  p = (const char *) buf;
+	}
+      else
+	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
+      mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2, 0))
+	{
+	  /* The buffer doesn't have enough space, finish to build.  */
+	  pstr->cur_state = prev_st;
+	  break;
+	}
+      else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+	{
+	  /* We treat these cases as a singlebyte character.  */
+	  mbclen = 1;
+	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+	  if (BE (pstr->trans != NULL, 0))
+	    wc = pstr->trans[wc];
+	  pstr->cur_state = prev_st;
+	}
+
+      /* Write wide character and padding.  */
+      pstr->wcs[byte_idx++] = wc;
+      /* Write paddings.  */
+      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+	pstr->wcs[byte_idx++] = WEOF;
+    }
+  pstr->valid_len = byte_idx;
+  pstr->valid_raw_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+   but for REG_ICASE.  */
+
+static reg_errcode_t
+internal_function
+build_wcs_upper_buffer (re_string_t *pstr)
+{
+  mbstate_t prev_st;
+  int src_idx, byte_idx, end_idx, remain_len;
+  size_t mbclen;
+#ifdef _LIBC
+  char buf[MB_LEN_MAX];
+  assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+  char buf[64];
+#endif
+
+  byte_idx = pstr->valid_len;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  /* The following optimization assumes that ASCII characters can be
+     mapped to wide characters with a simple cast.  */
+  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
+    {
+      while (byte_idx < end_idx)
+	{
+	  wchar_t wc;
+
+	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
+	      && mbsinit (&pstr->cur_state))
+	    {
+	      /* In case of a singlebyte character.  */
+	      pstr->mbs[byte_idx]
+		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
+	      /* The next step uses the assumption that wchar_t is encoded
+		 ASCII-safe: all ASCII values can be converted like this.  */
+	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
+	      ++byte_idx;
+	      continue;
+	    }
+
+	  remain_len = end_idx - byte_idx;
+	  prev_st = pstr->cur_state;
+	  mbclen = mbrtowc (&wc,
+			    ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+			     + byte_idx), remain_len, &pstr->cur_state);
+	  if (BE (mbclen + 2 > 2, 1))
+	    {
+	      wchar_t wcu = wc;
+	      if (iswlower (wc))
+		{
+		  size_t mbcdlen;
+
+		  wcu = towupper (wc);
+		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
+		  if (BE (mbclen == mbcdlen, 1))
+		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
+		  else
+		    {
+		      src_idx = byte_idx;
+		      goto offsets_needed;
+		    }
+		}
+	      else
+		memcpy (pstr->mbs + byte_idx,
+			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+	      pstr->wcs[byte_idx++] = wcu;
+	      /* Write paddings.  */
+	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+		pstr->wcs[byte_idx++] = WEOF;
+	    }
+	  else if (mbclen == (size_t) -1 || mbclen == 0)
+	    {
+	      /* It is an invalid character or '\0'.  Just use the byte.  */
+	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+	      pstr->mbs[byte_idx] = ch;
+	      /* And also cast it to wide char.  */
+	      pstr->wcs[byte_idx++] = (wchar_t) ch;
+	      if (BE (mbclen == (size_t) -1, 0))
+		pstr->cur_state = prev_st;
+	    }
+	  else
+	    {
+	      /* The buffer doesn't have enough space, finish to build.  */
+	      pstr->cur_state = prev_st;
+	      break;
+	    }
+	}
+      pstr->valid_len = byte_idx;
+      pstr->valid_raw_len = byte_idx;
+      return REG_NOERROR;
+    }
+  else
+    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
+      {
+	wchar_t wc;
+	const char *p;
+      offsets_needed:
+	remain_len = end_idx - byte_idx;
+	prev_st = pstr->cur_state;
+	if (BE (pstr->trans != NULL, 0))
+	  {
+	    int i, ch;
+
+	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+	      {
+		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
+		buf[i] = pstr->trans[ch];
+	      }
+	    p = (const char *) buf;
+	  }
+	else
+	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
+	mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+	if (BE (mbclen + 2 > 2, 1))
+	  {
+	    wchar_t wcu = wc;
+	    if (iswlower (wc))
+	      {
+		size_t mbcdlen;
+
+		wcu = towupper (wc);
+		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
+		if (BE (mbclen == mbcdlen, 1))
+		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
+		else if (mbcdlen != (size_t) -1)
+		  {
+		    size_t i;
+
+		    if (byte_idx + mbcdlen > pstr->bufs_len)
+		      {
+			pstr->cur_state = prev_st;
+			break;
+		      }
+
+		    if (pstr->offsets == NULL)
+		      {
+			pstr->offsets = re_malloc (int, pstr->bufs_len);
+
+			if (pstr->offsets == NULL)
+			  return REG_ESPACE;
+		      }
+		    if (!pstr->offsets_needed)
+		      {
+			for (i = 0; i < (size_t) byte_idx; ++i)
+			  pstr->offsets[i] = i;
+			pstr->offsets_needed = 1;
+		      }
+
+		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
+		    pstr->wcs[byte_idx] = wcu;
+		    pstr->offsets[byte_idx] = src_idx;
+		    for (i = 1; i < mbcdlen; ++i)
+		      {
+			pstr->offsets[byte_idx + i]
+			  = src_idx + (i < mbclen ? i : mbclen - 1);
+			pstr->wcs[byte_idx + i] = WEOF;
+		      }
+		    pstr->len += mbcdlen - mbclen;
+		    if (pstr->raw_stop > src_idx)
+		      pstr->stop += mbcdlen - mbclen;
+		    end_idx = (pstr->bufs_len > pstr->len)
+			      ? pstr->len : pstr->bufs_len;
+		    byte_idx += mbcdlen;
+		    src_idx += mbclen;
+		    continue;
+		  }
+                else
+                  memcpy (pstr->mbs + byte_idx, p, mbclen);
+	      }
+	    else
+	      memcpy (pstr->mbs + byte_idx, p, mbclen);
+
+	    if (BE (pstr->offsets_needed != 0, 0))
+	      {
+		size_t i;
+		for (i = 0; i < mbclen; ++i)
+		  pstr->offsets[byte_idx + i] = src_idx + i;
+	      }
+	    src_idx += mbclen;
+
+	    pstr->wcs[byte_idx++] = wcu;
+	    /* Write paddings.  */
+	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+	      pstr->wcs[byte_idx++] = WEOF;
+	  }
+	else if (mbclen == (size_t) -1 || mbclen == 0)
+	  {
+	    /* It is an invalid character or '\0'.  Just use the byte.  */
+	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
+
+	    if (BE (pstr->trans != NULL, 0))
+	      ch = pstr->trans [ch];
+	    pstr->mbs[byte_idx] = ch;
+
+	    if (BE (pstr->offsets_needed != 0, 0))
+	      pstr->offsets[byte_idx] = src_idx;
+	    ++src_idx;
+
+	    /* And also cast it to wide char.  */
+	    pstr->wcs[byte_idx++] = (wchar_t) ch;
+	    if (BE (mbclen == (size_t) -1, 0))
+	      pstr->cur_state = prev_st;
+	  }
+	else
+	  {
+	    /* The buffer doesn't have enough space, finish to build.  */
+	    pstr->cur_state = prev_st;
+	    break;
+	  }
+      }
+  pstr->valid_len = byte_idx;
+  pstr->valid_raw_len = src_idx;
+  return REG_NOERROR;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+   Return the index.  */
+
+static int
+internal_function
+re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
+{
+  mbstate_t prev_st;
+  int rawbuf_idx;
+  size_t mbclen;
+  wchar_t wc = WEOF;
+
+  /* Skip the characters which are not necessary to check.  */
+  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
+       rawbuf_idx < new_raw_idx;)
+    {
+      int remain_len;
+      remain_len = pstr->len - rawbuf_idx;
+      prev_st = pstr->cur_state;
+      mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
+			remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+	{
+	  /* We treat these cases as a single byte character.  */
+	  if (mbclen == 0 || remain_len == 0)
+	    wc = L'\0';
+	  else
+	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
+	  mbclen = 1;
+	  pstr->cur_state = prev_st;
+	}
+      /* Then proceed the next character.  */
+      rawbuf_idx += mbclen;
+    }
+  *last_wc = (wint_t) wc;
+  return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N  */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+   This function is used in case of REG_ICASE.  */
+
+static void
+internal_function
+build_upper_buffer (re_string_t *pstr)
+{
+  int char_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+      if (BE (pstr->trans != NULL, 0))
+	ch = pstr->trans[ch];
+      if (islower (ch))
+	pstr->mbs[char_idx] = toupper (ch);
+      else
+	pstr->mbs[char_idx] = ch;
+    }
+  pstr->valid_len = char_idx;
+  pstr->valid_raw_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR.  */
+
+static void
+internal_function
+re_string_translate_buffer (re_string_t *pstr)
+{
+  int buf_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+      pstr->mbs[buf_idx] = pstr->trans[ch];
+    }
+
+  pstr->valid_len = buf_idx;
+  pstr->valid_raw_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
+   convert to upper case in case of REG_ICASE, apply translation.  */
+
+static reg_errcode_t
+internal_function
+re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
+{
+  int offset = idx - pstr->raw_mbs_idx;
+  if (BE (offset < 0, 0))
+    {
+      /* Reset buffer.  */
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+      pstr->len = pstr->raw_len;
+      pstr->stop = pstr->raw_stop;
+      pstr->valid_len = 0;
+      pstr->raw_mbs_idx = 0;
+      pstr->valid_raw_len = 0;
+      pstr->offsets_needed = 0;
+      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+      if (!pstr->mbs_allocated)
+	pstr->mbs = (unsigned char *) pstr->raw_mbs;
+      offset = idx;
+    }
+
+  if (BE (offset != 0, 1))
+    {
+      /* Should the already checked characters be kept?  */
+      if (BE (offset < pstr->valid_raw_len, 1))
+	{
+	  /* Yes, move them to the front of the buffer.  */
+#ifdef RE_ENABLE_I18N
+	  if (BE (pstr->offsets_needed, 0))
+	    {
+	      int low = 0, high = pstr->valid_len, mid;
+	      do
+		{
+		  mid = (high + low) / 2;
+		  if (pstr->offsets[mid] > offset)
+		    high = mid;
+		  else if (pstr->offsets[mid] < offset)
+		    low = mid + 1;
+		  else
+		    break;
+		}
+	      while (low < high);
+	      if (pstr->offsets[mid] < offset)
+		++mid;
+	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
+							eflags);
+	      /* This can be quite complicated, so handle specially
+		 only the common and easy case where the character with
+		 different length representation of lower and upper
+		 case is present at or after offset.  */
+	      if (pstr->valid_len > offset
+		  && mid == offset && pstr->offsets[mid] == offset)
+		{
+		  memmove (pstr->wcs, pstr->wcs + offset,
+			   (pstr->valid_len - offset) * sizeof (wint_t));
+		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
+		  pstr->valid_len -= offset;
+		  pstr->valid_raw_len -= offset;
+		  for (low = 0; low < pstr->valid_len; low++)
+		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
+		}
+	      else
+		{
+		  /* Otherwise, just find out how long the partial multibyte
+		     character at offset is and fill it with WEOF/255.  */
+		  pstr->len = pstr->raw_len - idx + offset;
+		  pstr->stop = pstr->raw_stop - idx + offset;
+		  pstr->offsets_needed = 0;
+		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
+		    --mid;
+		  while (mid < pstr->valid_len)
+		    if (pstr->wcs[mid] != WEOF)
+		      break;
+		    else
+		      ++mid;
+		  if (mid == pstr->valid_len)
+		    pstr->valid_len = 0;
+		  else
+		    {
+		      pstr->valid_len = pstr->offsets[mid] - offset;
+		      if (pstr->valid_len)
+			{
+			  for (low = 0; low < pstr->valid_len; ++low)
+			    pstr->wcs[low] = WEOF;
+			  memset (pstr->mbs, 255, pstr->valid_len);
+			}
+		    }
+		  pstr->valid_raw_len = pstr->valid_len;
+		}
+	    }
+	  else
+#endif
+	    {
+	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
+							eflags);
+#ifdef RE_ENABLE_I18N
+	      if (pstr->mb_cur_max > 1)
+		memmove (pstr->wcs, pstr->wcs + offset,
+			 (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+	      if (BE (pstr->mbs_allocated, 0))
+		memmove (pstr->mbs, pstr->mbs + offset,
+			 pstr->valid_len - offset);
+	      pstr->valid_len -= offset;
+	      pstr->valid_raw_len -= offset;
+#if DEBUG
+	      assert (pstr->valid_len > 0);
+#endif
+	    }
+	}
+      else
+	{
+	  /* No, skip all characters until IDX.  */
+	  int prev_valid_len = pstr->valid_len;
+
+#ifdef RE_ENABLE_I18N
+	  if (BE (pstr->offsets_needed, 0))
+	    {
+	      pstr->len = pstr->raw_len - idx + offset;
+	      pstr->stop = pstr->raw_stop - idx + offset;
+	      pstr->offsets_needed = 0;
+	    }
+#endif
+	  pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+	  if (pstr->mb_cur_max > 1)
+	    {
+	      int wcs_idx;
+	      wint_t wc = WEOF;
+
+	      if (pstr->is_utf8)
+		{
+		  const unsigned char *raw, *p, *q, *end;
+
+		  /* Special case UTF-8.  Multi-byte chars start with any
+		     byte other than 0x80 - 0xbf.  */
+		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
+		  end = raw + (offset - pstr->mb_cur_max);
+		  if (end < pstr->raw_mbs)
+		    end = pstr->raw_mbs;
+		  p = raw + offset - 1;
+#ifdef _LIBC
+		  /* We know the wchar_t encoding is UCS4, so for the simple
+		     case, ASCII characters, skip the conversion step.  */
+		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
+		    {
+		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+		      /* pstr->valid_len = 0; */
+		      wc = (wchar_t) *p;
+		    }
+		  else
+#endif
+		    for (; p >= end; --p)
+		      if ((*p & 0xc0) != 0x80)
+			{
+			  mbstate_t cur_state;
+			  wchar_t wc2;
+			  int mlen = raw + pstr->len - p;
+			  unsigned char buf[6];
+			  size_t mbclen;
+
+			  q = p;
+			  if (BE (pstr->trans != NULL, 0))
+			    {
+			      int i = mlen < 6 ? mlen : 6;
+			      while (--i >= 0)
+				buf[i] = pstr->trans[p[i]];
+			      q = buf;
+			    }
+			  /* XXX Don't use mbrtowc, we know which conversion
+			     to use (UTF-8 -> UCS4).  */
+			  memset (&cur_state, 0, sizeof (cur_state));
+			  mbclen = mbrtowc (&wc2, (const char *) p, mlen,
+					    &cur_state);
+			  if (raw + offset - p <= mbclen
+			      && mbclen < (size_t) -2)
+			    {
+			      memset (&pstr->cur_state, '\0',
+				      sizeof (mbstate_t));
+			      pstr->valid_len = mbclen - (raw + offset - p);
+			      wc = wc2;
+			    }
+			  break;
+			}
+		}
+
+	      if (wc == WEOF)
+		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+	      if (wc == WEOF)
+		pstr->tip_context
+		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
+	      else
+		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
+				      && IS_WIDE_WORD_CHAR (wc))
+				     ? CONTEXT_WORD
+				     : ((IS_WIDE_NEWLINE (wc)
+					 && pstr->newline_anchor)
+					? CONTEXT_NEWLINE : 0));
+	      if (BE (pstr->valid_len, 0))
+		{
+		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+		    pstr->wcs[wcs_idx] = WEOF;
+		  if (pstr->mbs_allocated)
+		    memset (pstr->mbs, 255, pstr->valid_len);
+		}
+	      pstr->valid_raw_len = pstr->valid_len;
+	    }
+	  else
+#endif /* RE_ENABLE_I18N */
+	    {
+	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+	      pstr->valid_raw_len = 0;
+	      if (pstr->trans)
+		c = pstr->trans[c];
+	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
+				   ? CONTEXT_WORD
+				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
+				      ? CONTEXT_NEWLINE : 0));
+	    }
+	}
+      if (!BE (pstr->mbs_allocated, 0))
+	pstr->mbs += offset;
+    }
+  pstr->raw_mbs_idx = idx;
+  pstr->len -= offset;
+  pstr->stop -= offset;
+
+  /* Then build the buffers.  */
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1)
+    {
+      if (pstr->icase)
+	{
+	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
+	  if (BE (ret != REG_NOERROR, 0))
+	    return ret;
+	}
+      else
+	build_wcs_buffer (pstr);
+    }
+  else
+#endif /* RE_ENABLE_I18N */
+    if (BE (pstr->mbs_allocated, 0))
+      {
+	if (pstr->icase)
+	  build_upper_buffer (pstr);
+	else if (pstr->trans != NULL)
+	  re_string_translate_buffer (pstr);
+      }
+    else
+      pstr->valid_len = pstr->len;
+
+  pstr->cur_idx = 0;
+  return REG_NOERROR;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_peek_byte_case (const re_string_t *pstr, int idx)
+{
+  int ch, off;
+
+  /* Handle the common (easiest) cases first.  */
+  if (BE (!pstr->mbs_allocated, 1))
+    return re_string_peek_byte (pstr, idx);
+
+#ifdef RE_ENABLE_I18N
+  if (pstr->mb_cur_max > 1
+      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
+    return re_string_peek_byte (pstr, idx);
+#endif
+
+  off = pstr->cur_idx + idx;
+#ifdef RE_ENABLE_I18N
+  if (pstr->offsets_needed)
+    off = pstr->offsets[off];
+#endif
+
+  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
+     this function returns CAPITAL LETTER I instead of first byte of
+     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
+     since peek_byte_case doesn't advance cur_idx in any way.  */
+  if (pstr->offsets_needed && !isascii (ch))
+    return re_string_peek_byte (pstr, idx);
+#endif
+
+  return ch;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_fetch_byte_case (re_string_t *pstr)
+{
+  if (BE (!pstr->mbs_allocated, 1))
+    return re_string_fetch_byte (pstr);
+
+#ifdef RE_ENABLE_I18N
+  if (pstr->offsets_needed)
+    {
+      int off, ch;
+
+      /* For tr_TR.UTF-8 [[:islower:]] there is
+	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
+	 in that case the whole multi-byte character and return
+	 the original letter.  On the other side, with
+	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
+	 anything else would complicate things too much.  */
+
+      if (!re_string_first_byte (pstr, pstr->cur_idx))
+	return re_string_fetch_byte (pstr);
+
+      off = pstr->offsets[pstr->cur_idx];
+      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+      if (! isascii (ch))
+	return re_string_fetch_byte (pstr);
+
+      re_string_skip_bytes (pstr,
+			    re_string_char_size_at (pstr, pstr->cur_idx));
+      return ch;
+    }
+#endif
+
+  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
+}
+
+static void
+internal_function
+re_string_destruct (re_string_t *pstr)
+{
+#ifdef RE_ENABLE_I18N
+  re_free (pstr->wcs);
+  re_free (pstr->offsets);
+#endif /* RE_ENABLE_I18N  */
+  if (pstr->mbs_allocated)
+    re_free (pstr->mbs);
+}
+
+/* Return the context at IDX in INPUT.  */
+
+static unsigned int
+internal_function
+re_string_context_at (const re_string_t *input, int idx, int eflags)
+{
+  int c;
+  if (BE (idx < 0, 0))
+    /* In this case, we use the value stored in input->tip_context,
+       since we can't know the character in input->mbs[-1] here.  */
+    return input->tip_context;
+  if (BE (idx == input->len, 0))
+    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1)
+    {
+      wint_t wc;
+      int wc_idx = idx;
+      while(input->wcs[wc_idx] == WEOF)
+	{
+#ifdef DEBUG
+	  /* It must not happen.  */
+	  assert (wc_idx >= 0);
+#endif
+	  --wc_idx;
+	  if (wc_idx < 0)
+	    return input->tip_context;
+	}
+      wc = input->wcs[wc_idx];
+      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
+	return CONTEXT_WORD;
+      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
+	      ? CONTEXT_NEWLINE : 0);
+    }
+  else
+#endif
+    {
+      c = re_string_byte_at (input, idx);
+      if (bitset_contain (input->word_char, c))
+	return CONTEXT_WORD;
+      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
+    }
+}
+
+/* Functions for set operation.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_alloc (re_node_set *set, int size)
+{
+  set->alloc = size;
+  set->nelem = 0;
+  set->elems = re_malloc (int, size);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_1 (re_node_set *set, int elem)
+{
+  set->alloc = 1;
+  set->nelem = 1;
+  set->elems = re_malloc (int, 1);
+  if (BE (set->elems == NULL, 0))
+    {
+      set->alloc = set->nelem = 0;
+      return REG_ESPACE;
+    }
+  set->elems[0] = elem;
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
+{
+  set->alloc = 2;
+  set->elems = re_malloc (int, 2);
+  if (BE (set->elems == NULL, 0))
+    return REG_ESPACE;
+  if (elem1 == elem2)
+    {
+      set->nelem = 1;
+      set->elems[0] = elem1;
+    }
+  else
+    {
+      set->nelem = 2;
+      if (elem1 < elem2)
+	{
+	  set->elems[0] = elem1;
+	  set->elems[1] = elem2;
+	}
+      else
+	{
+	  set->elems[0] = elem2;
+	  set->elems[1] = elem1;
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
+{
+  dest->nelem = src->nelem;
+  if (src->nelem > 0)
+    {
+      dest->alloc = dest->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+	{
+	  dest->alloc = dest->nelem = 0;
+	  return REG_ESPACE;
+	}
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+    }
+  else
+    re_node_set_init_empty (dest);
+  return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
+			   const re_node_set *src2)
+{
+  int i1, i2, is, id, delta, sbase;
+  if (src1->nelem == 0 || src2->nelem == 0)
+    return REG_NOERROR;
+
+  /* We need dest->nelem + 2 * elems_in_intersection; this is a
+     conservative estimate.  */
+  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+    {
+      int new_alloc = src1->nelem + src2->nelem + dest->alloc;
+      int *new_elems = re_realloc (dest->elems, int, new_alloc);
+      if (BE (new_elems == NULL, 0))
+        return REG_ESPACE;
+      dest->elems = new_elems;
+      dest->alloc = new_alloc;
+    }
+
+  /* Find the items in the intersection of SRC1 and SRC2, and copy
+     into the top of DEST those that are not already in DEST itself.  */
+  sbase = dest->nelem + src1->nelem + src2->nelem;
+  i1 = src1->nelem - 1;
+  i2 = src2->nelem - 1;
+  id = dest->nelem - 1;
+  for (;;)
+    {
+      if (src1->elems[i1] == src2->elems[i2])
+	{
+	  /* Try to find the item in DEST.  Maybe we could binary search?  */
+	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
+	    --id;
+
+          if (id < 0 || dest->elems[id] != src1->elems[i1])
+            dest->elems[--sbase] = src1->elems[i1];
+
+	  if (--i1 < 0 || --i2 < 0)
+	    break;
+	}
+
+      /* Lower the highest of the two items.  */
+      else if (src1->elems[i1] < src2->elems[i2])
+	{
+	  if (--i2 < 0)
+	    break;
+	}
+      else
+	{
+	  if (--i1 < 0)
+	    break;
+	}
+    }
+
+  id = dest->nelem - 1;
+  is = dest->nelem + src1->nelem + src2->nelem - 1;
+  delta = is - sbase + 1;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place; this is more or
+     less the same loop that is in re_node_set_merge.  */
+  dest->nelem += delta;
+  if (delta > 0 && id >= 0)
+    for (;;)
+      {
+        if (dest->elems[is] > dest->elems[id])
+          {
+            /* Copy from the top.  */
+            dest->elems[id + delta--] = dest->elems[is--];
+            if (delta == 0)
+              break;
+          }
+        else
+          {
+            /* Slide from the bottom.  */
+            dest->elems[id + delta] = dest->elems[id];
+            if (--id < 0)
+              break;
+          }
+      }
+
+  /* Copy remaining SRC elements.  */
+  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
+
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
+			const re_node_set *src2)
+{
+  int i1, i2, id;
+  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+    {
+      dest->alloc = src1->nelem + src2->nelem;
+      dest->elems = re_malloc (int, dest->alloc);
+      if (BE (dest->elems == NULL, 0))
+	return REG_ESPACE;
+    }
+  else
+    {
+      if (src1 != NULL && src1->nelem > 0)
+	return re_node_set_init_copy (dest, src1);
+      else if (src2 != NULL && src2->nelem > 0)
+	return re_node_set_init_copy (dest, src2);
+      else
+	re_node_set_init_empty (dest);
+      return REG_NOERROR;
+    }
+  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+    {
+      if (src1->elems[i1] > src2->elems[i2])
+	{
+	  dest->elems[id++] = src2->elems[i2++];
+	  continue;
+	}
+      if (src1->elems[i1] == src2->elems[i2])
+	++i2;
+      dest->elems[id++] = src1->elems[i1++];
+    }
+  if (i1 < src1->nelem)
+    {
+      memcpy (dest->elems + id, src1->elems + i1,
+	     (src1->nelem - i1) * sizeof (int));
+      id += src1->nelem - i1;
+    }
+  else if (i2 < src2->nelem)
+    {
+      memcpy (dest->elems + id, src2->elems + i2,
+	     (src2->nelem - i2) * sizeof (int));
+      id += src2->nelem - i2;
+    }
+  dest->nelem = id;
+  return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
+
+static reg_errcode_t
+internal_function
+re_node_set_merge (re_node_set *dest, const re_node_set *src)
+{
+  int is, id, sbase, delta;
+  if (src == NULL || src->nelem == 0)
+    return REG_NOERROR;
+  if (dest->alloc < 2 * src->nelem + dest->nelem)
+    {
+      int new_alloc = 2 * (src->nelem + dest->alloc);
+      int *new_buffer = re_realloc (dest->elems, int, new_alloc);
+      if (BE (new_buffer == NULL, 0))
+	return REG_ESPACE;
+      dest->elems = new_buffer;
+      dest->alloc = new_alloc;
+    }
+
+  if (BE (dest->nelem == 0, 0))
+    {
+      dest->nelem = src->nelem;
+      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+      return REG_NOERROR;
+    }
+
+  /* Copy into the top of DEST the items of SRC that are not
+     found in DEST.  Maybe we could binary search in DEST?  */
+  for (sbase = dest->nelem + 2 * src->nelem,
+       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
+    {
+      if (dest->elems[id] == src->elems[is])
+        is--, id--;
+      else if (dest->elems[id] < src->elems[is])
+        dest->elems[--sbase] = src->elems[is--];
+      else /* if (dest->elems[id] > src->elems[is]) */
+        --id;
+    }
+
+  if (is >= 0)
+    {
+      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
+      sbase -= is + 1;
+      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
+    }
+
+  id = dest->nelem - 1;
+  is = dest->nelem + 2 * src->nelem - 1;
+  delta = is - sbase + 1;
+  if (delta == 0)
+    return REG_NOERROR;
+
+  /* Now copy.  When DELTA becomes zero, the remaining
+     DEST elements are already in place.  */
+  dest->nelem += delta;
+  for (;;)
+    {
+      if (dest->elems[is] > dest->elems[id])
+        {
+	  /* Copy from the top.  */
+          dest->elems[id + delta--] = dest->elems[is--];
+	  if (delta == 0)
+	    break;
+	}
+      else
+        {
+          /* Slide from the bottom.  */
+          dest->elems[id + delta] = dest->elems[id];
+	  if (--id < 0)
+	    {
+	      /* Copy remaining SRC elements.  */
+	      memcpy (dest->elems, dest->elems + sbase,
+	              delta * sizeof (int));
+	      break;
+	    }
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+   SET should not already have ELEM.
+   return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+internal_function
+re_node_set_insert (re_node_set *set, int elem)
+{
+  int idx;
+  /* In case the set is empty.  */
+  if (set->alloc == 0)
+    {
+      if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
+	return 1;
+      else
+	return -1;
+    }
+
+  if (BE (set->nelem, 0) == 0)
+    {
+      /* We already guaranteed above that set->alloc != 0.  */
+      set->elems[0] = elem;
+      ++set->nelem;
+      return 1;
+    }
+
+  /* Realloc if we need.  */
+  if (set->alloc == set->nelem)
+    {
+      int *new_elems;
+      set->alloc = set->alloc * 2;
+      new_elems = re_realloc (set->elems, int, set->alloc);
+      if (BE (new_elems == NULL, 0))
+	return -1;
+      set->elems = new_elems;
+    }
+
+  /* Move the elements which follows the new element.  Test the
+     first element separately to skip a check in the inner loop.  */
+  if (elem < set->elems[0])
+    {
+      idx = 0;
+      for (idx = set->nelem; idx > 0; idx--)
+        set->elems[idx] = set->elems[idx - 1];
+    }
+  else
+    {
+      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
+        set->elems[idx] = set->elems[idx - 1];
+    }
+
+  /* Insert the new element.  */
+  set->elems[idx] = elem;
+  ++set->nelem;
+  return 1;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+   SET should not already have any element greater than or equal to ELEM.
+   Return -1 if an error is occured, return 1 otherwise.  */
+
+static int
+internal_function
+re_node_set_insert_last (re_node_set *set, int elem)
+{
+  /* Realloc if we need.  */
+  if (set->alloc == set->nelem)
+    {
+      int *new_elems;
+      set->alloc = (set->alloc + 1) * 2;
+      new_elems = re_realloc (set->elems, int, set->alloc);
+      if (BE (new_elems == NULL, 0))
+	return -1;
+      set->elems = new_elems;
+    }
+
+  /* Insert the new element.  */
+  set->elems[set->nelem++] = elem;
+  return 1;
+}
+
+/* Compare two node sets SET1 and SET2.
+   return 1 if SET1 and SET2 are equivalent, return 0 otherwise.  */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
+{
+  int i;
+  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+    return 0;
+  for (i = set1->nelem ; --i >= 0 ; )
+    if (set1->elems[i] != set2->elems[i])
+      return 0;
+  return 1;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_contains (const re_node_set *set, int elem)
+{
+  unsigned int idx, right, mid;
+  if (set->nelem <= 0)
+    return 0;
+
+  /* Binary search the element.  */
+  idx = 0;
+  right = set->nelem - 1;
+  while (idx < right)
+    {
+      mid = (idx + right) / 2;
+      if (set->elems[mid] < elem)
+	idx = mid + 1;
+      else
+	right = mid;
+    }
+  return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+internal_function
+re_node_set_remove_at (re_node_set *set, int idx)
+{
+  if (idx < 0 || idx >= set->nelem)
+    return;
+  --set->nelem;
+  for (; idx < set->nelem; idx++)
+    set->elems[idx] = set->elems[idx + 1];
+}
+
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+   Or return -1, if an error will be occured.  */
+
+static int
+internal_function
+re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
+{
+  int type = token.type;
+  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
+    {
+      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
+      int *new_nexts, *new_indices;
+      re_node_set *new_edests, *new_eclosures;
+      re_token_t *new_nodes;
+
+      /* Avoid overflows.  */
+      if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
+	return -1;
+
+      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
+      if (BE (new_nodes == NULL, 0))
+	return -1;
+      dfa->nodes = new_nodes;
+      new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+      new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
+      if (BE (new_nexts == NULL || new_indices == NULL
+	      || new_edests == NULL || new_eclosures == NULL, 0))
+	return -1;
+      dfa->nexts = new_nexts;
+      dfa->org_indices = new_indices;
+      dfa->edests = new_edests;
+      dfa->eclosures = new_eclosures;
+      dfa->nodes_alloc = new_nodes_alloc;
+    }
+  dfa->nodes[dfa->nodes_len] = token;
+  dfa->nodes[dfa->nodes_len].constraint = 0;
+#ifdef RE_ENABLE_I18N
+  dfa->nodes[dfa->nodes_len].accept_mb =
+    (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
+#endif
+  dfa->nexts[dfa->nodes_len] = -1;
+  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
+  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
+  return dfa->nodes_len++;
+}
+
+static inline unsigned int
+internal_function
+calc_state_hash (const re_node_set *nodes, unsigned int context)
+{
+  unsigned int hash = nodes->nelem + context;
+  int i;
+  for (i = 0 ; i < nodes->nelem ; i++)
+    hash += nodes->elems[i];
+  return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+	   return value is NULL and ERR is REG_NOERROR.
+	 - We never return non-NULL value in case of any errors, it is for
+	   optimization.  */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
+		  const re_node_set *nodes)
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (BE (nodes->nelem == 0, 0))
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, 0);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (hash != state->hash)
+	continue;
+      if (re_node_set_compare (&state->nodes, nodes))
+	return state;
+    }
+
+  /* There are no appropriate state in the dfa, create the new one.  */
+  new_state = create_ci_newstate (dfa, nodes, hash);
+  if (BE (new_state == NULL, 0))
+    *err = REG_ESPACE;
+
+  return new_state;
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+   whose context is equivalent to CONTEXT.
+   Return the pointer to the state, if we found it in the DFA.
+   Otherwise create the new one and return it.  In case of an error
+   return NULL and set the error code in ERR.
+   Note: - We assume NULL as the invalid state, then it is possible that
+	   return value is NULL and ERR is REG_NOERROR.
+	 - We never return non-NULL value in case of any errors, it is for
+	   optimization.  */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
+			  const re_node_set *nodes, unsigned int context)
+{
+  unsigned int hash;
+  re_dfastate_t *new_state;
+  struct re_state_table_entry *spot;
+  int i;
+  if (nodes->nelem == 0)
+    {
+      *err = REG_NOERROR;
+      return NULL;
+    }
+  hash = calc_state_hash (nodes, context);
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+  for (i = 0 ; i < spot->num ; i++)
+    {
+      re_dfastate_t *state = spot->array[i];
+      if (state->hash == hash
+	  && state->context == context
+	  && re_node_set_compare (state->entrance_nodes, nodes))
+	return state;
+    }
+  /* There are no appropriate state in `dfa', create the new one.  */
+  new_state = create_cd_newstate (dfa, nodes, context, hash);
+  if (BE (new_state == NULL, 0))
+    *err = REG_ESPACE;
+
+  return new_state;
+}
+
+/* Finish initialization of the new state NEWSTATE, and using its hash value
+   HASH put in the appropriate bucket of DFA's state table.  Return value
+   indicates the error code if failed.  */
+
+static reg_errcode_t
+register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
+		unsigned int hash)
+{
+  struct re_state_table_entry *spot;
+  reg_errcode_t err;
+  int i;
+
+  newstate->hash = hash;
+  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
+  if (BE (err != REG_NOERROR, 0))
+    return REG_ESPACE;
+  for (i = 0; i < newstate->nodes.nelem; i++)
+    {
+      int elem = newstate->nodes.elems[i];
+      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
+        re_node_set_insert_last (&newstate->non_eps_nodes, elem);
+    }
+
+  spot = dfa->state_table + (hash & dfa->state_hash_mask);
+  if (BE (spot->alloc <= spot->num, 0))
+    {
+      int new_alloc = 2 * spot->num + 2;
+      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
+					      new_alloc);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      spot->array = new_array;
+      spot->alloc = new_alloc;
+    }
+  spot->array[spot->num++] = newstate;
+  return REG_NOERROR;
+}
+
+static void
+free_state (re_dfastate_t *state)
+{
+  re_node_set_free (&state->non_eps_nodes);
+  re_node_set_free (&state->inveclosure);
+  if (state->entrance_nodes != &state->nodes)
+    {
+      re_node_set_free (state->entrance_nodes);
+      re_free (state->entrance_nodes);
+    }
+  re_node_set_free (&state->nodes);
+  re_free (state->word_trtable);
+  re_free (state->trtable);
+  re_free (state);
+}
+
+/* Create the new state which is independ of contexts.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+internal_function
+create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+		    unsigned int hash)
+{
+  int i;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+
+  newstate->entrance_nodes = &newstate->nodes;
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      if (type == CHARACTER && !node->constraint)
+	continue;
+#ifdef RE_ENABLE_I18N
+      newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+      /* If the state has the halt node, the state is a halt state.  */
+      if (type == END_OF_RE)
+	newstate->halt = 1;
+      else if (type == OP_BACK_REF)
+	newstate->has_backref = 1;
+      else if (type == ANCHOR || node->constraint)
+	newstate->has_constraint = 1;
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+   Return the new state if succeeded, otherwise return NULL.  */
+
+static re_dfastate_t *
+internal_function
+create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+		    unsigned int context, unsigned int hash)
+{
+  int i, nctx_nodes = 0;
+  reg_errcode_t err;
+  re_dfastate_t *newstate;
+
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+  if (BE (newstate == NULL, 0))
+    return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+
+  newstate->context = context;
+  newstate->entrance_nodes = &newstate->nodes;
+
+  for (i = 0 ; i < nodes->nelem ; i++)
+    {
+      unsigned int constraint = 0;
+      re_token_t *node = dfa->nodes + nodes->elems[i];
+      re_token_type_t type = node->type;
+      if (node->constraint)
+	constraint = node->constraint;
+
+      if (type == CHARACTER && !constraint)
+	continue;
+#ifdef RE_ENABLE_I18N
+      newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+      /* If the state has the halt node, the state is a halt state.  */
+      if (type == END_OF_RE)
+	newstate->halt = 1;
+      else if (type == OP_BACK_REF)
+	newstate->has_backref = 1;
+      else if (type == ANCHOR)
+	constraint = node->opr.ctx_type;
+
+      if (constraint)
+	{
+	  if (newstate->entrance_nodes == &newstate->nodes)
+	    {
+	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
+	      if (BE (newstate->entrance_nodes == NULL, 0))
+		{
+		  free_state (newstate);
+		  return NULL;
+		}
+	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
+	      nctx_nodes = 0;
+	      newstate->has_constraint = 1;
+	    }
+
+	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+	    {
+	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+	      ++nctx_nodes;
+	    }
+	}
+    }
+  err = register_state (dfa, newstate, hash);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_state (newstate);
+      newstate = NULL;
+    }
+  return  newstate;
+}
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regcomp.c" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* Extended regular expression matching and search library.
+   Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+					  size_t length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+				     const re_dfastate_t *init_state,
+				     char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void optimize_utf8 (re_dfa_t *dfa);
+#endif
+static reg_errcode_t analyze (regex_t *preg);
+static reg_errcode_t preorder (bin_tree_t *root,
+			       reg_errcode_t (fn (void *, bin_tree_t *)),
+			       void *extra);
+static reg_errcode_t postorder (bin_tree_t *root,
+				reg_errcode_t (fn (void *, bin_tree_t *)),
+				void *extra);
+static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
+static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
+static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
+				 bin_tree_t *node);
+static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
+static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
+static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
+static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
+static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
+				   unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+					 int node, int root);
+static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
+static int fetch_number (re_string_t *input, re_token_t *token,
+			 reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+			reg_syntax_t syntax) internal_function;
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+			  reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+				  re_token_t *token, reg_syntax_t syntax,
+				  int nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+				 re_token_t *token, reg_syntax_t syntax,
+				 int nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+				     re_token_t *token, reg_syntax_t syntax,
+				     int nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+				  re_token_t *token, reg_syntax_t syntax,
+				  int nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+				 re_dfa_t *dfa, re_token_t *token,
+				 reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+				      re_token_t *token, reg_syntax_t syntax,
+				      reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+					    re_string_t *regexp,
+					    re_token_t *token, int token_len,
+					    re_dfa_t *dfa,
+					    reg_syntax_t syntax,
+					    int accept_hyphen);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+					  re_string_t *regexp,
+					  re_token_t *token);
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+					re_charset_t *mbcset,
+					int *equiv_class_alloc,
+					const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+				      bitset_t sbcset,
+				      re_charset_t *mbcset,
+				      int *char_class_alloc,
+				      const unsigned char *class_name,
+				      reg_syntax_t syntax);
+#else  /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+					const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+				      bitset_t sbcset,
+				      const unsigned char *class_name,
+				      reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
+				       RE_TRANSLATE_TYPE trans,
+				       const unsigned char *class_name,
+				       const unsigned char *extra,
+				       int non_match, reg_errcode_t *err);
+static bin_tree_t *create_tree (re_dfa_t *dfa,
+				bin_tree_t *left, bin_tree_t *right,
+				re_token_type_t type);
+static bin_tree_t *create_token_tree (re_dfa_t *dfa,
+				      bin_tree_t *left, bin_tree_t *right,
+				      const re_token_t *token);
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+static void free_token (re_token_t *node);
+static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
+static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
+
+/* This table gives an error message for each of the error codes listed
+   in regex.h.  Obviously the order here has to be same as there.
+   POSIX doesn't require that we do anything for REG_NOERROR,
+   but why not be nice?  */
+
+const char __re_error_msgid[] attribute_hidden =
+  {
+#define REG_NOERROR_IDX	0
+    gettext_noop ("Success")	/* REG_NOERROR */
+    "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+    gettext_noop ("No match")	/* REG_NOMATCH */
+    "\0"
+#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
+    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+    "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+    "\0"
+#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+    "\0"
+#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
+    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+    "\0"
+#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
+    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+    "\0"
+#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
+    gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
+    "\0"
+#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+    "\0"
+#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+    "\0"
+#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
+    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+    "\0"
+#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+    gettext_noop ("Invalid range end")	/* REG_ERANGE */
+    "\0"
+#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
+    gettext_noop ("Memory exhausted") /* REG_ESPACE */
+    "\0"
+#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
+    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+    "\0"
+#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+    gettext_noop ("Premature end of regular expression") /* REG_EEND */
+    "\0"
+#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
+    gettext_noop ("Regular expression too big") /* REG_ESIZE */
+    "\0"
+#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
+    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+  };
+
+const size_t __re_error_msgid_idx[] attribute_hidden =
+  {
+    REG_NOERROR_IDX,
+    REG_NOMATCH_IDX,
+    REG_BADPAT_IDX,
+    REG_ECOLLATE_IDX,
+    REG_ECTYPE_IDX,
+    REG_EESCAPE_IDX,
+    REG_ESUBREG_IDX,
+    REG_EBRACK_IDX,
+    REG_EPAREN_IDX,
+    REG_EBRACE_IDX,
+    REG_BADBR_IDX,
+    REG_ERANGE_IDX,
+    REG_ESPACE_IDX,
+    REG_BADRPT_IDX,
+    REG_EEND_IDX,
+    REG_ESIZE_IDX,
+    REG_ERPAREN_IDX
+  };
+
+/* Entry points for GNU code.  */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+   compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+   Returns 0 if the pattern was valid, otherwise an error string.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+   are set in BUFP on entry.  */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+    const char *pattern;
+    size_t length;
+    struct re_pattern_buffer *bufp;
+{
+  reg_errcode_t ret;
+
+  /* And GNU code determines whether or not to get register information
+     by passing null for the REGS argument to re_match, etc., not by
+     setting no_sub, unless RE_NO_SUB is set.  */
+  bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
+
+  /* Match anchors at newline.  */
+  bufp->newline_anchor = 1;
+
+  ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+  if (!ret)
+    return NULL;
+  return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+   also be assigned to arbitrarily: each pattern buffer stores its own
+   syntax, so it can be changed between regex compilations.  */
+/* This has no initializer because initialized variables in Emacs
+   become read-only after dumping.  */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation.  This provides
+   for compatibility for various utilities which historically have
+   different, incompatible syntaxes.
+
+   The argument SYNTAX is a bit mask comprised of the various bits
+   defined in regex.h.  We return the old syntax.  */
+
+reg_syntax_t
+re_set_syntax (syntax)
+    reg_syntax_t syntax;
+{
+  reg_syntax_t ret = re_syntax_options;
+
+  re_syntax_options = syntax;
+  return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (bufp)
+    struct re_pattern_buffer *bufp;
+{
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  char *fastmap = bufp->fastmap;
+
+  memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+  re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+  if (dfa->init_state != dfa->init_state_word)
+    re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+  if (dfa->init_state != dfa->init_state_nl)
+    re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+  if (dfa->init_state != dfa->init_state_begbuf)
+    re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+  bufp->fastmap_accurate = 1;
+  return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+__attribute ((always_inline))
+re_set_fastmap (char *fastmap, int icase, int ch)
+{
+  fastmap[ch] = 1;
+  if (icase)
+    fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+   Compile fastmap for the initial_state INIT_STATE.  */
+
+static void
+re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
+			 char *fastmap)
+{
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  int node_cnt;
+  int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
+  for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+    {
+      int node = init_state->nodes.elems[node_cnt];
+      re_token_type_t type = dfa->nodes[node].type;
+
+      if (type == CHARACTER)
+	{
+	  re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+#ifdef RE_ENABLE_I18N
+	  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+	    {
+	      unsigned char *buf = alloca (dfa->mb_cur_max), *p;
+	      wchar_t wc;
+	      mbstate_t state;
+
+	      p = buf;
+	      *p++ = dfa->nodes[node].opr.c;
+	      while (++node < dfa->nodes_len
+		     &&	dfa->nodes[node].type == CHARACTER
+		     && dfa->nodes[node].mb_partial)
+		*p++ = dfa->nodes[node].opr.c;
+	      memset (&state, '\0', sizeof (state));
+	      if (mbrtowc (&wc, (const char *) buf, p - buf,
+			   &state) == p - buf
+		  && (__wcrtomb ((char *) buf, towlower (wc), &state)
+		      != (size_t) -1))
+		re_set_fastmap (fastmap, 0, buf[0]);
+	    }
+#endif
+	}
+      else if (type == SIMPLE_BRACKET)
+	{
+	  int i, ch;
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    {
+	      int j;
+	      bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
+	      for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+		if (w & ((bitset_word_t) 1 << j))
+		  re_set_fastmap (fastmap, icase, ch);
+	    }
+	}
+#ifdef RE_ENABLE_I18N
+      else if (type == COMPLEX_BRACKET)
+	{
+	  int i;
+	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+	  if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
+	      || cset->nranges || cset->nchar_classes)
+	    {
+# ifdef _LIBC
+	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+		{
+		  /* In this case we want to catch the bytes which are
+		     the first byte of any collation elements.
+		     e.g. In da_DK, we want to catch 'a' since "aa"
+			  is a valid collation element, and don't catch
+			  'b' since 'b' is the only collation element
+			  which starts from 'b'.  */
+		  const int32_t *table = (const int32_t *)
+		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+		  for (i = 0; i < SBC_MAX; ++i)
+		    if (table[i] < 0)
+		      re_set_fastmap (fastmap, icase, i);
+		}
+# else
+	      if (dfa->mb_cur_max > 1)
+		for (i = 0; i < SBC_MAX; ++i)
+		  if (__btowc (i) == WEOF)
+		    re_set_fastmap (fastmap, icase, i);
+# endif /* not _LIBC */
+	    }
+	  for (i = 0; i < cset->nmbchars; ++i)
+	    {
+	      char buf[256];
+	      mbstate_t state;
+	      memset (&state, '\0', sizeof (state));
+	      if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+		re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+	      if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+		{
+		  if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+		      != (size_t) -1)
+		    re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
+		}
+	    }
+	}
+#endif /* RE_ENABLE_I18N */
+      else if (type == OP_PERIOD
+#ifdef RE_ENABLE_I18N
+	       || type == OP_UTF8_PERIOD
+#endif /* RE_ENABLE_I18N */
+	       || type == END_OF_RE)
+	{
+	  memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+	  if (type == END_OF_RE)
+	    bufp->can_be_null = 1;
+	  return;
+	}
+    }
+}
+
+/* Entry point for POSIX code.  */
+/* regcomp takes a regular expression as a string and compiles it.
+
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
+   since POSIX says we shouldn't.  Thus, we set
+
+     `buffer' to the compiled pattern;
+     `used' to the length of the compiled pattern;
+     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+       REG_EXTENDED bit in CFLAGS is set; otherwise, to
+       RE_SYNTAX_POSIX_BASIC;
+     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+     `fastmap' to an allocated space for the fastmap;
+     `fastmap_accurate' to zero;
+     `re_nsub' to the number of subexpressions in PATTERN.
+
+   PATTERN is the address of the pattern string.
+
+   CFLAGS is a series of bits which affect compilation.
+
+     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+     use POSIX basic syntax.
+
+     If REG_NEWLINE is set, then . and [^...] don't match newline.
+     Also, regexec will try a match beginning after every newline.
+
+     If REG_ICASE is set, then we considers upper- and lowercase
+     versions of letters to be equivalent when matching.
+
+     If REG_NOSUB is set, then when PREG is passed to regexec, that
+     routine will report only success or failure, and nothing about the
+     registers.
+
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
+   the return codes and their meanings.)  */
+
+int
+regcomp (preg, pattern, cflags)
+    regex_t *__restrict preg;
+    const char *__restrict pattern;
+    int cflags;
+{
+  reg_errcode_t ret;
+  reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+			 : RE_SYNTAX_POSIX_BASIC);
+
+  preg->buffer = NULL;
+  preg->allocated = 0;
+  preg->used = 0;
+
+  /* Try to allocate space for the fastmap.  */
+  preg->fastmap = re_malloc (char, SBC_MAX);
+  if (BE (preg->fastmap == NULL, 0))
+    return REG_ESPACE;
+
+  syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+  /* If REG_NEWLINE is set, newlines are treated differently.  */
+  if (cflags & REG_NEWLINE)
+    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
+      syntax &= ~RE_DOT_NEWLINE;
+      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+      /* It also changes the matching behavior.  */
+      preg->newline_anchor = 1;
+    }
+  else
+    preg->newline_anchor = 0;
+  preg->no_sub = !!(cflags & REG_NOSUB);
+  preg->translate = NULL;
+
+  ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+  /* POSIX doesn't distinguish between an unmatched open-group and an
+     unmatched close-group: both are REG_EPAREN.  */
+  if (ret == REG_ERPAREN)
+    ret = REG_EPAREN;
+
+  /* We have already checked preg->fastmap != NULL.  */
+  if (BE (ret == REG_NOERROR, 1))
+    /* Compute the fastmap now, since regexec cannot modify the pattern
+       buffer.  This function never fails in this implementation.  */
+    (void) re_compile_fastmap (preg);
+  else
+    {
+      /* Some error occurred while compiling the expression.  */
+      re_free (preg->fastmap);
+      preg->fastmap = NULL;
+    }
+
+  return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+   from either regcomp or regexec.   We don't use PREG here.  */
+
+/* regerror ( int errcode, preg, errbuf, errbuf_size) */
+size_t
+regerror (
+    int errcode,
+    const regex_t *__restrict preg,
+    char *__restrict errbuf,
+    size_t errbuf_size)
+{
+  const char *msg;
+  size_t msg_size;
+
+  if (BE (errcode < 0
+	  || errcode >= (int) (sizeof (__re_error_msgid_idx)
+			       / sizeof (__re_error_msgid_idx[0])), 0))
+    /* Only error codes returned by the rest of the code should be passed
+       to this routine.  If we are given anything else, or if other regex
+       code generates an invalid error code, then the program has a bug.
+       Dump core so we can fix it.  */
+    abort ();
+
+  msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+  msg_size = strlen (msg) + 1; /* Includes the null.  */
+
+  if (BE (errbuf_size != 0, 1))
+    {
+      if (BE (msg_size > errbuf_size, 0))
+	{
+#if defined HAVE_MEMPCPY || defined _LIBC
+	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+	  memcpy (errbuf, msg, errbuf_size - 1);
+	  errbuf[errbuf_size - 1] = 0;
+#endif
+	}
+      else
+	memcpy (errbuf, msg, msg_size);
+    }
+
+  return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+#ifdef RE_ENABLE_I18N
+/* This static array is used for the map to single-byte characters when
+   UTF-8 is used.  Otherwise we would allocate memory just to initialize
+   it the same all the time.  UTF-8 is the preferred encoding so this is
+   a worthwhile optimization.  */
+static const bitset_t utf8_sb_map =
+{
+  /* Set the first 128 bits.  */
+  [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
+};
+#endif
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+  int i, j;
+
+  if (dfa->nodes)
+    for (i = 0; i < dfa->nodes_len; ++i)
+      free_token (dfa->nodes + i);
+  re_free (dfa->nexts);
+  for (i = 0; i < dfa->nodes_len; ++i)
+    {
+      if (dfa->eclosures != NULL)
+	re_node_set_free (dfa->eclosures + i);
+      if (dfa->inveclosures != NULL)
+	re_node_set_free (dfa->inveclosures + i);
+      if (dfa->edests != NULL)
+	re_node_set_free (dfa->edests + i);
+    }
+  re_free (dfa->edests);
+  re_free (dfa->eclosures);
+  re_free (dfa->inveclosures);
+  re_free (dfa->nodes);
+
+  if (dfa->state_table)
+    for (i = 0; i <= dfa->state_hash_mask; ++i)
+      {
+	struct re_state_table_entry *entry = dfa->state_table + i;
+	for (j = 0; j < entry->num; ++j)
+	  {
+	    re_dfastate_t *state = entry->array[j];
+	    free_state (state);
+	  }
+        re_free (entry->array);
+      }
+  re_free (dfa->state_table);
+#ifdef RE_ENABLE_I18N
+  if (dfa->sb_char != utf8_sb_map)
+    re_free (dfa->sb_char);
+#endif
+  re_free (dfa->subexp_map);
+#ifdef DEBUG
+  re_free (dfa->re_str);
+#endif
+
+  re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG.  */
+
+void
+regfree (preg)
+    regex_t *preg;
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  if (BE (dfa != NULL, 1))
+    free_dfa_content (dfa);
+  preg->buffer = NULL;
+  preg->allocated = 0;
+
+  re_free (preg->fastmap);
+  preg->fastmap = NULL;
+
+  re_free (preg->translate);
+  preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer.  */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+   these names if they don't use our functions, and still use
+   regcomp/regexec above without link errors.  */
+weak_function
+# endif
+re_comp (s)
+     const char *s;
+{
+  reg_errcode_t ret;
+  char *fastmap;
+
+  if (!s)
+    {
+      if (!re_comp_buf.buffer)
+	return gettext ("No previous regular expression");
+      return 0;
+    }
+
+  if (re_comp_buf.buffer)
+    {
+      fastmap = re_comp_buf.fastmap;
+      re_comp_buf.fastmap = NULL;
+      __regfree (&re_comp_buf);
+      memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+      re_comp_buf.fastmap = fastmap;
+    }
+
+  if (re_comp_buf.fastmap == NULL)
+    {
+      re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+      if (re_comp_buf.fastmap == NULL)
+	return (char *) gettext (__re_error_msgid
+				 + __re_error_msgid_idx[(int) REG_ESPACE]);
+    }
+
+  /* Since `re_exec' always passes NULL for the `regs' argument, we
+     don't need to initialize the pattern buffer fields which affect it.  */
+
+  /* Match anchors at newlines.  */
+  re_comp_buf.newline_anchor = 1;
+
+  ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+  if (!ret)
+    return NULL;
+
+  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+  return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+  __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.
+   Compile the regular expression PATTERN, whose length is LENGTH.
+   SYNTAX indicate regular expression's syntax.  */
+
+static reg_errcode_t
+re_compile_internal (regex_t *preg, const char * pattern, size_t length,
+		     reg_syntax_t syntax)
+{
+  reg_errcode_t err = REG_NOERROR;
+  re_dfa_t *dfa;
+  re_string_t regexp;
+
+  /* Initialize the pattern buffer.  */
+  preg->fastmap_accurate = 0;
+  preg->syntax = syntax;
+  preg->not_bol = preg->not_eol = 0;
+  preg->used = 0;
+  preg->re_nsub = 0;
+  preg->can_be_null = 0;
+  preg->regs_allocated = REGS_UNALLOCATED;
+
+  /* Initialize the dfa.  */
+  dfa = (re_dfa_t *) preg->buffer;
+  if (BE (preg->allocated < sizeof (re_dfa_t), 0))
+    {
+      /* If zero allocated, but buffer is non-null, try to realloc
+	 enough space.  This loses if buffer's address is bogus, but
+	 that is the user's responsibility.  If ->buffer is NULL this
+	 is a simple allocation.  */
+      dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+      if (dfa == NULL)
+	return REG_ESPACE;
+      preg->allocated = sizeof (re_dfa_t);
+      preg->buffer = (unsigned char *) dfa;
+    }
+  preg->used = sizeof (re_dfa_t);
+
+  err = init_dfa (dfa, length);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+#ifdef DEBUG
+  /* Note: length+1 will not overflow since it is checked in init_dfa.  */
+  dfa->re_str = re_malloc (char, length + 1);
+  strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+  __libc_lock_init (dfa->lock);
+
+  err = re_string_construct (&regexp, pattern, length, preg->translate,
+			     syntax & RE_ICASE, dfa);
+  if (BE (err != REG_NOERROR, 0))
+    {
+    re_compile_internal_free_return:
+      free_workarea_compile (preg);
+      re_string_destruct (&regexp);
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+      return err;
+    }
+
+  /* Parse the regular expression, and build a structure tree.  */
+  preg->re_nsub = 0;
+  dfa->str_tree = parse (&regexp, preg, syntax, &err);
+  if (BE (dfa->str_tree == NULL, 0))
+    goto re_compile_internal_free_return;
+
+  /* Analyze the tree and create the nfa.  */
+  err = analyze (preg);
+  if (BE (err != REG_NOERROR, 0))
+    goto re_compile_internal_free_return;
+
+#ifdef RE_ENABLE_I18N
+  /* If possible, do searching in single byte encoding to speed things up.  */
+  if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
+    optimize_utf8 (dfa);
+#endif
+
+  /* Then create the initial state of the dfa.  */
+  err = create_initial_state (dfa);
+
+  /* Release work areas.  */
+  free_workarea_compile (preg);
+  re_string_destruct (&regexp);
+
+  if (BE (err != REG_NOERROR, 0))
+    {
+      free_dfa_content (dfa);
+      preg->buffer = NULL;
+      preg->allocated = 0;
+    }
+
+  return err;
+}
+
+/* Initialize DFA.  We use the length of the regular expression PAT_LEN
+   as the initial length of some arrays.  */
+
+static reg_errcode_t
+init_dfa (re_dfa_t *dfa, size_t pat_len)
+{
+  unsigned int table_size;
+#ifndef _LIBC
+  char *codeset_name;
+#endif
+
+  memset (dfa, '\0', sizeof (re_dfa_t));
+
+  /* Force allocation of str_tree_storage the first time.  */
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+
+  /* Avoid overflows.  */
+  if (pat_len == SIZE_MAX)
+    return REG_ESPACE;
+
+  dfa->nodes_alloc = pat_len + 1;
+  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+  /*  table_size = 2 ^ ceil(log pat_len) */
+  for (table_size = 1; ; table_size <<= 1)
+    if (table_size > pat_len)
+      break;
+
+  dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+  dfa->state_hash_mask = table_size - 1;
+
+  dfa->mb_cur_max = MB_CUR_MAX;
+#ifdef _LIBC
+  if (dfa->mb_cur_max == 6
+      && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
+    dfa->is_utf8 = 1;
+  dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
+		       != 0);
+#else
+# ifdef HAVE_LANGINFO_CODESET
+  codeset_name = nl_langinfo (CODESET);
+# else
+  codeset_name = getenv ("LC_ALL");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LC_CTYPE");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LANG");
+  if (codeset_name == NULL)
+    codeset_name = "";
+  else if (strchr (codeset_name, '.') !=  NULL)
+    codeset_name = strchr (codeset_name, '.') + 1;
+# endif
+
+  if (strcasecmp (codeset_name, "UTF-8") == 0
+      || strcasecmp (codeset_name, "UTF8") == 0)
+    dfa->is_utf8 = 1;
+
+  /* We check exhaustively in the loop below if this charset is a
+     superset of ASCII.  */
+  dfa->map_notascii = 0;
+#endif
+
+#ifdef RE_ENABLE_I18N
+  if (dfa->mb_cur_max > 1)
+    {
+      if (dfa->is_utf8)
+	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
+      else
+	{
+	  int i, j, ch;
+
+	  dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+	  if (BE (dfa->sb_char == NULL, 0))
+	    return REG_ESPACE;
+
+	  /* Set the bits corresponding to single byte chars.  */
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+	      {
+		wint_t wch = __btowc (ch);
+		if (wch != WEOF)
+		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
+# ifndef _LIBC
+		if (isascii (ch) && wch != ch)
+		  dfa->map_notascii = 1;
+# endif
+	      }
+	}
+    }
+#endif
+
+  if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
+    return REG_ESPACE;
+  return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+   "word".  In this case "word" means that it is the word construction
+   character used by some operators like "\<", "\>", etc.  */
+
+static void
+internal_function
+init_word_char (re_dfa_t *dfa)
+{
+  int i, j, ch;
+  dfa->word_ops_used = 1;
+  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+      if (isalnum (ch) || ch == '_')
+	dfa->word_char[i] |= (bitset_word_t) 1 << j;
+}
+
+/* Free the work area which are only used while compiling.  */
+
+static void
+free_workarea_compile (regex_t *preg)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_storage_t *storage, *next;
+  for (storage = dfa->str_tree_storage; storage; storage = next)
+    {
+      next = storage->next;
+      re_free (storage);
+    }
+  dfa->str_tree_storage = NULL;
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+  dfa->str_tree = NULL;
+  re_free (dfa->org_indices);
+  dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts.  */
+
+static reg_errcode_t
+create_initial_state (re_dfa_t *dfa)
+{
+  int first, i;
+  reg_errcode_t err;
+  re_node_set init_nodes;
+
+  /* Initial states have the epsilon closure of the node which is
+     the first node of the regular expression.  */
+  first = dfa->str_tree->first->node_idx;
+  dfa->init_node = first;
+  err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* The back-references which are in initial states can epsilon transit,
+     since in this case all of the subexpressions can be null.
+     Then we add epsilon closures of the nodes which are the next nodes of
+     the back-references.  */
+  if (dfa->nbackref > 0)
+    for (i = 0; i < init_nodes.nelem; ++i)
+      {
+	int node_idx = init_nodes.elems[i];
+	re_token_type_t type = dfa->nodes[node_idx].type;
+
+	int clexp_idx;
+	if (type != OP_BACK_REF)
+	  continue;
+	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+	  {
+	    re_token_t *clexp_node;
+	    clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+	    if (clexp_node->type == OP_CLOSE_SUBEXP
+		&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
+	      break;
+	  }
+	if (clexp_idx == init_nodes.nelem)
+	  continue;
+
+	if (type == OP_BACK_REF)
+	  {
+	    int dest_idx = dfa->edests[node_idx].elems[0];
+	    if (!re_node_set_contains (&init_nodes, dest_idx))
+	      {
+		re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+		i = 0;
+	      }
+	  }
+      }
+
+  /* It must be the first time to invoke acquire_state.  */
+  dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+  /* We don't check ERR here, since the initial state must not be NULL.  */
+  if (BE (dfa->init_state == NULL, 0))
+    return err;
+  if (dfa->init_state->has_constraint)
+    {
+      dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+						       CONTEXT_WORD);
+      dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+						     CONTEXT_NEWLINE);
+      dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+							 &init_nodes,
+							 CONTEXT_NEWLINE
+							 | CONTEXT_BEGBUF);
+      if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+	      || dfa->init_state_begbuf == NULL, 0))
+	return err;
+    }
+  else
+    dfa->init_state_word = dfa->init_state_nl
+      = dfa->init_state_begbuf = dfa->init_state;
+
+  re_node_set_free (&init_nodes);
+  return REG_NOERROR;
+}
+
+#ifdef RE_ENABLE_I18N
+/* If it is possible to do searching in single byte encoding instead of UTF-8
+   to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
+   DFA nodes where needed.  */
+
+static void
+optimize_utf8 (re_dfa_t *dfa)
+{
+  int node, i, mb_chars = 0, has_period = 0;
+
+  for (node = 0; node < dfa->nodes_len; ++node)
+    switch (dfa->nodes[node].type)
+      {
+      case CHARACTER:
+	if (dfa->nodes[node].opr.c >= 0x80)
+	  mb_chars = 1;
+	break;
+      case ANCHOR:
+	switch (dfa->nodes[node].opr.idx)
+	  {
+	  case LINE_FIRST:
+	  case LINE_LAST:
+	  case BUF_FIRST:
+	  case BUF_LAST:
+	    break;
+	  default:
+	    /* Word anchors etc. cannot be handled.  */
+	    return;
+	  }
+	break;
+      case OP_PERIOD:
+        has_period = 1;
+        break;
+      case OP_BACK_REF:
+      case OP_ALT:
+      case END_OF_RE:
+      case OP_DUP_ASTERISK:
+      case OP_OPEN_SUBEXP:
+      case OP_CLOSE_SUBEXP:
+	break;
+      case COMPLEX_BRACKET:
+	return;
+      case SIMPLE_BRACKET:
+	/* Just double check.  The non-ASCII range starts at 0x80.  */
+	assert (0x80 % BITSET_WORD_BITS == 0);
+        for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
+	  if (dfa->nodes[node].opr.sbcset[i])
+	    return;
+	break;
+      default:
+	abort ();
+      }
+
+  if (mb_chars || has_period)
+    for (node = 0; node < dfa->nodes_len; ++node)
+      {
+	if (dfa->nodes[node].type == CHARACTER
+	    && dfa->nodes[node].opr.c >= 0x80)
+	  dfa->nodes[node].mb_partial = 0;
+	else if (dfa->nodes[node].type == OP_PERIOD)
+	  dfa->nodes[node].type = OP_UTF8_PERIOD;
+      }
+
+  /* The search can be in single byte locale.  */
+  dfa->mb_cur_max = 1;
+  dfa->is_utf8 = 0;
+  dfa->has_mb_node = dfa->nbackref > 0 || has_period;
+}
+#endif
+
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+   "eclosure", and "inveclosure".  */
+
+static reg_errcode_t
+analyze (regex_t *preg)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  reg_errcode_t ret;
+
+  /* Allocate arrays.  */
+  dfa->nexts = re_malloc (int, dfa->nodes_alloc);
+  dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
+  dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+  dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+  if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+	  || dfa->eclosures == NULL, 0))
+    return REG_ESPACE;
+
+  dfa->subexp_map = re_malloc (int, preg->re_nsub);
+  if (dfa->subexp_map != NULL)
+    {
+      int i;
+      for (i = 0; i < preg->re_nsub; i++)
+	dfa->subexp_map[i] = i;
+      preorder (dfa->str_tree, optimize_subexps, dfa);
+      for (i = 0; i < preg->re_nsub; i++)
+	if (dfa->subexp_map[i] != i)
+	  break;
+      if (i == preg->re_nsub)
+	{
+	  free (dfa->subexp_map);
+	  dfa->subexp_map = NULL;
+	}
+    }
+
+  ret = postorder (dfa->str_tree, lower_subexps, preg);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  ret = postorder (dfa->str_tree, calc_first, dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  preorder (dfa->str_tree, calc_next, dfa);
+  ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+  ret = calc_eclosure (dfa);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  /* We only need this during the prune_impossible_nodes pass in regexec.c;
+     skip it if p_i_n will not run, as calc_inveclosure can be quadratic.  */
+  if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
+      || dfa->nbackref)
+    {
+      dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
+      if (BE (dfa->inveclosures == NULL, 0))
+        return REG_ESPACE;
+      ret = calc_inveclosure (dfa);
+    }
+
+  return ret;
+}
+
+/* Our parse trees are very unbalanced, so we cannot use a stack to
+   implement parse tree visits.  Instead, we use parent pointers and
+   some hairy code in these two functions.  */
+static reg_errcode_t
+postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+	   void *extra)
+{
+  bin_tree_t *node, *prev;
+
+  for (node = root; ; )
+    {
+      /* Descend down the tree, preferably to the left (or to the right
+	 if that's the only child).  */
+      while (node->left || node->right)
+	if (node->left)
+          node = node->left;
+        else
+          node = node->right;
+
+      do
+	{
+	  reg_errcode_t err = fn (extra, node);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+          if (node->parent == NULL)
+	    return REG_NOERROR;
+	  prev = node;
+	  node = node->parent;
+	}
+      /* Go up while we have a node that is reached from the right.  */
+      while (node->right == prev || node->right == NULL);
+      node = node->right;
+    }
+}
+
+static reg_errcode_t
+preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+	  void *extra)
+{
+  bin_tree_t *node;
+
+  for (node = root; ; )
+    {
+      reg_errcode_t err = fn (extra, node);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+
+      /* Go to the left node, or up and to the right.  */
+      if (node->left)
+	node = node->left;
+      else
+	{
+	  bin_tree_t *prev = NULL;
+	  while (node->right == prev || node->right == NULL)
+	    {
+	      prev = node;
+	      node = node->parent;
+	      if (!node)
+	        return REG_NOERROR;
+	    }
+	  node = node->right;
+	}
+    }
+}
+
+/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
+   re_search_internal to map the inner one's opr.idx to this one's.  Adjust
+   backreferences as well.  Requires a preorder visit.  */
+static reg_errcode_t
+optimize_subexps (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+
+  if (node->token.type == OP_BACK_REF && dfa->subexp_map)
+    {
+      int idx = node->token.opr.idx;
+      node->token.opr.idx = dfa->subexp_map[idx];
+      dfa->used_bkref_map |= 1 << node->token.opr.idx;
+    }
+
+  else if (node->token.type == SUBEXP
+           && node->left && node->left->token.type == SUBEXP)
+    {
+      int other_idx = node->left->token.opr.idx;
+
+      node->left = node->left->left;
+      if (node->left)
+        node->left->parent = node;
+
+      dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
+      if (other_idx < BITSET_WORD_BITS)
+	  dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
+    }
+
+  return REG_NOERROR;
+}
+
+/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
+   of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP.  */
+static reg_errcode_t
+lower_subexps (void *extra, bin_tree_t *node)
+{
+  regex_t *preg = (regex_t *) extra;
+  reg_errcode_t err = REG_NOERROR;
+
+  if (node->left && node->left->token.type == SUBEXP)
+    {
+      node->left = lower_subexp (&err, preg, node->left);
+      if (node->left)
+	node->left->parent = node;
+    }
+  if (node->right && node->right->token.type == SUBEXP)
+    {
+      node->right = lower_subexp (&err, preg, node->right);
+      if (node->right)
+	node->right->parent = node;
+    }
+
+  return err;
+}
+
+static bin_tree_t *
+lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *body = node->left;
+  bin_tree_t *op, *cls, *tree1, *tree;
+
+  if (preg->no_sub
+      /* We do not optimize empty subexpressions, because otherwise we may
+	 have bad CONCAT nodes with NULL children.  This is obviously not
+	 very common, so we do not lose much.  An example that triggers
+	 this case is the sed "script" /\(\)/x.  */
+      && node->left != NULL
+      && (node->token.opr.idx >= BITSET_WORD_BITS
+	  || !(dfa->used_bkref_map
+	       & ((bitset_word_t) 1 << node->token.opr.idx))))
+    return node->left;
+
+  /* Convert the SUBEXP node to the concatenation of an
+     OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP.  */
+  op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
+  cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
+  tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
+  tree = create_tree (dfa, op, tree1, CONCAT);
+  if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
+  op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
+  return tree;
+}
+
+/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
+   nodes.  Requires a postorder visit.  */
+static reg_errcode_t
+calc_first (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+  if (node->token.type == CONCAT)
+    {
+      node->first = node->left->first;
+      node->node_idx = node->left->node_idx;
+    }
+  else
+    {
+      node->first = node;
+      node->node_idx = re_dfa_add_node (dfa, node->token);
+      if (BE (node->node_idx == -1, 0))
+        return REG_ESPACE;
+    }
+  return REG_NOERROR;
+}
+
+/* Pass 2: compute NEXT on the tree.  Preorder visit.  */
+static reg_errcode_t
+calc_next (void *extra, bin_tree_t *node)
+{
+  switch (node->token.type)
+    {
+    case OP_DUP_ASTERISK:
+      node->left->next = node;
+      break;
+    case CONCAT:
+      node->left->next = node->right->first;
+      node->right->next = node->next;
+      break;
+    default:
+      if (node->left)
+	node->left->next = node->next;
+      if (node->right)
+        node->right->next = node->next;
+      break;
+    }
+  return REG_NOERROR;
+}
+
+/* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */
+static reg_errcode_t
+link_nfa_nodes (void *extra, bin_tree_t *node)
+{
+  re_dfa_t *dfa = (re_dfa_t *) extra;
+  int idx = node->node_idx;
+  reg_errcode_t err = REG_NOERROR;
+
+  switch (node->token.type)
+    {
+    case CONCAT:
+      break;
+
+    case END_OF_RE:
+      assert (node->next == NULL);
+      break;
+
+    case OP_DUP_ASTERISK:
+    case OP_ALT:
+      {
+	int left, right;
+	dfa->has_plural_match = 1;
+	if (node->left != NULL)
+	  left = node->left->first->node_idx;
+	else
+	  left = node->next->node_idx;
+	if (node->right != NULL)
+	  right = node->right->first->node_idx;
+	else
+	  right = node->next->node_idx;
+	assert (left > -1);
+	assert (right > -1);
+	err = re_node_set_init_2 (dfa->edests + idx, left, right);
+      }
+      break;
+
+    case ANCHOR:
+    case OP_OPEN_SUBEXP:
+    case OP_CLOSE_SUBEXP:
+      err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
+      break;
+
+    case OP_BACK_REF:
+      dfa->nexts[idx] = node->next->node_idx;
+      if (node->token.type == OP_BACK_REF)
+	re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
+      break;
+
+    default:
+      assert (!IS_EPSILON_NODE (node->token.type));
+      dfa->nexts[idx] = node->next->node_idx;
+      break;
+    }
+
+  return err;
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+   Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+   to their own constraint.  */
+
+static reg_errcode_t
+internal_function
+duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
+			int root_node, unsigned int init_constraint)
+{
+  int org_node, clone_node, ret;
+  unsigned int constraint = init_constraint;
+  for (org_node = top_org_node, clone_node = top_clone_node;;)
+    {
+      int org_dest, clone_dest;
+      if (dfa->nodes[org_node].type == OP_BACK_REF)
+	{
+	  /* If the back reference epsilon-transit, its destination must
+	     also have the constraint.  Then duplicate the epsilon closure
+	     of the destination of the back reference, and store it in
+	     edests of the back reference.  */
+	  org_dest = dfa->nexts[org_node];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  dfa->nexts[clone_node] = dfa->nexts[org_node];
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      else if (dfa->edests[org_node].nelem == 0)
+	{
+	  /* In case of the node can't epsilon-transit, don't duplicate the
+	     destination and store the original destination as the
+	     destination of the node.  */
+	  dfa->nexts[clone_node] = dfa->nexts[org_node];
+	  break;
+	}
+      else if (dfa->edests[org_node].nelem == 1)
+	{
+	  /* In case of the node can epsilon-transit, and it has only one
+	     destination.  */
+	  org_dest = dfa->edests[org_node].elems[0];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  if (dfa->nodes[org_node].type == ANCHOR)
+	    {
+	      /* In case of the node has another constraint, append it.  */
+	      if (org_node == root_node && clone_node != org_node)
+		{
+		  /* ...but if the node is root_node itself, it means the
+		     epsilon closure have a loop, then tie it to the
+		     destination of the root_node.  */
+		  ret = re_node_set_insert (dfa->edests + clone_node,
+					    org_dest);
+		  if (BE (ret < 0, 0))
+		    return REG_ESPACE;
+		  break;
+		}
+	      constraint |= dfa->nodes[org_node].opr.ctx_type;
+	    }
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      else /* dfa->edests[org_node].nelem == 2 */
+	{
+	  /* In case of the node can epsilon-transit, and it has two
+	     destinations. In the bin_tree_t and DFA, that's '|' and '*'.   */
+	  org_dest = dfa->edests[org_node].elems[0];
+	  re_node_set_empty (dfa->edests + clone_node);
+	  /* Search for a duplicated node which satisfies the constraint.  */
+	  clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+	  if (clone_dest == -1)
+	    {
+	      /* There are no such a duplicated node, create a new one.  */
+	      reg_errcode_t err;
+	      clone_dest = duplicate_node (dfa, org_dest, constraint);
+	      if (BE (clone_dest == -1, 0))
+		return REG_ESPACE;
+	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	      err = duplicate_node_closure (dfa, org_dest, clone_dest,
+					    root_node, constraint);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	  else
+	    {
+	      /* There are a duplicated node which satisfy the constraint,
+		 use it to avoid infinite loop.  */
+	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	      if (BE (ret < 0, 0))
+		return REG_ESPACE;
+	    }
+
+	  org_dest = dfa->edests[org_node].elems[1];
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
+	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+	  if (BE (ret < 0, 0))
+	    return REG_ESPACE;
+	}
+      org_node = org_dest;
+      clone_node = clone_dest;
+    }
+  return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+   satisfies the constraint CONSTRAINT.  */
+
+static int
+search_duplicated_node (const re_dfa_t *dfa, int org_node,
+			unsigned int constraint)
+{
+  int idx;
+  for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+    {
+      if (org_node == dfa->org_indices[idx]
+	  && constraint == dfa->nodes[idx].constraint)
+	return idx; /* Found.  */
+    }
+  return -1; /* Not found.  */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+   Return the index of the new node, or -1 if insufficient storage is
+   available.  */
+
+static int
+duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
+{
+  int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
+  if (BE (dup_idx != -1, 1))
+    {
+      dfa->nodes[dup_idx].constraint = constraint;
+      if (dfa->nodes[org_idx].type == ANCHOR)
+	dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
+      dfa->nodes[dup_idx].duplicated = 1;
+
+      /* Store the index of the original node.  */
+      dfa->org_indices[dup_idx] = org_idx;
+    }
+  return dup_idx;
+}
+
+static reg_errcode_t
+calc_inveclosure (re_dfa_t *dfa)
+{
+  int src, idx, ret;
+  for (idx = 0; idx < dfa->nodes_len; ++idx)
+    re_node_set_init_empty (dfa->inveclosures + idx);
+
+  for (src = 0; src < dfa->nodes_len; ++src)
+    {
+      int *elems = dfa->eclosures[src].elems;
+      for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+	{
+	  ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
+	  if (BE (ret == -1, 0))
+	    return REG_ESPACE;
+	}
+    }
+
+  return REG_NOERROR;
+}
+
+/* Calculate "eclosure" for all the node in DFA.  */
+
+static reg_errcode_t
+calc_eclosure (re_dfa_t *dfa)
+{
+  int node_idx, incomplete;
+#ifdef DEBUG
+  assert (dfa->nodes_len > 0);
+#endif
+  incomplete = 0;
+  /* For each nodes, calculate epsilon closure.  */
+  for (node_idx = 0; ; ++node_idx)
+    {
+      reg_errcode_t err;
+      re_node_set eclosure_elem;
+      if (node_idx == dfa->nodes_len)
+	{
+	  if (!incomplete)
+	    break;
+	  incomplete = 0;
+	  node_idx = 0;
+	}
+
+#ifdef DEBUG
+      assert (dfa->eclosures[node_idx].nelem != -1);
+#endif
+
+      /* If we have already calculated, skip it.  */
+      if (dfa->eclosures[node_idx].nelem != 0)
+	continue;
+      /* Calculate epsilon closure of `node_idx'.  */
+      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+
+      if (dfa->eclosures[node_idx].nelem == 0)
+	{
+	  incomplete = 1;
+	  re_node_set_free (&eclosure_elem);
+	}
+    }
+  return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE.  */
+
+static reg_errcode_t
+calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
+{
+  reg_errcode_t err;
+  unsigned int constraint;
+  int i, incomplete;
+  re_node_set eclosure;
+  incomplete = 0;
+  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  /* This indicates that we are calculating this node now.
+     We reference this value to avoid infinite loop.  */
+  dfa->eclosures[node].nelem = -1;
+
+  constraint = ((dfa->nodes[node].type == ANCHOR)
+		? dfa->nodes[node].opr.ctx_type : 0);
+  /* If the current node has constraints, duplicate all nodes.
+     Since they must inherit the constraints.  */
+  if (constraint
+      && dfa->edests[node].nelem
+      && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+    {
+      err = duplicate_node_closure (dfa, node, node, node, constraint);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  /* Expand each epsilon destination nodes.  */
+  if (IS_EPSILON_NODE(dfa->nodes[node].type))
+    for (i = 0; i < dfa->edests[node].nelem; ++i)
+      {
+	re_node_set eclosure_elem;
+	int edest = dfa->edests[node].elems[i];
+	/* If calculating the epsilon closure of `edest' is in progress,
+	   return intermediate result.  */
+	if (dfa->eclosures[edest].nelem == -1)
+	  {
+	    incomplete = 1;
+	    continue;
+	  }
+	/* If we haven't calculated the epsilon closure of `edest' yet,
+	   calculate now. Otherwise use calculated epsilon closure.  */
+	if (dfa->eclosures[edest].nelem == 0)
+	  {
+	    err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+	    if (BE (err != REG_NOERROR, 0))
+	      return err;
+	  }
+	else
+	  eclosure_elem = dfa->eclosures[edest];
+	/* Merge the epsilon closure of `edest'.  */
+	re_node_set_merge (&eclosure, &eclosure_elem);
+	/* If the epsilon closure of `edest' is incomplete,
+	   the epsilon closure of this node is also incomplete.  */
+	if (dfa->eclosures[edest].nelem == 0)
+	  {
+	    incomplete = 1;
+	    re_node_set_free (&eclosure_elem);
+	  }
+      }
+
+  /* Epsilon closures include itself.  */
+  re_node_set_insert (&eclosure, node);
+  if (incomplete && !root)
+    dfa->eclosures[node].nelem = 0;
+  else
+    dfa->eclosures[node] = eclosure;
+  *new_set = eclosure;
+  return REG_NOERROR;
+}
+
+/* Functions for token which are used in the parser.  */
+
+/* Fetch a token from INPUT.
+   We must not use this function inside bracket expressions.  */
+
+static void
+internal_function
+fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
+{
+  re_string_skip_bytes (input, peek_token (result, input, syntax));
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function inside bracket expressions.  */
+
+static int
+internal_function
+peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+  unsigned char c;
+
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+  token->word_char = 0;
+#ifdef RE_ENABLE_I18N
+  token->mb_partial = 0;
+  if (input->mb_cur_max > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      token->mb_partial = 1;
+      return 1;
+    }
+#endif
+  if (c == '\\')
+    {
+      unsigned char c2;
+      if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+	{
+	  token->type = BACK_SLASH;
+	  return 1;
+	}
+
+      c2 = re_string_peek_byte_case (input, 1);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+      if (input->mb_cur_max > 1)
+	{
+	  wint_t wc = re_string_wchar_at (input,
+					  re_string_cur_idx (input) + 1);
+	  token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+	}
+      else
+#endif
+	token->word_char = IS_WORD_CHAR (c2) != 0;
+
+      switch (c2)
+	{
+	case '|':
+	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+	    token->type = OP_ALT;
+	  break;
+	case '1': case '2': case '3': case '4': case '5':
+	case '6': case '7': case '8': case '9':
+	  if (!(syntax & RE_NO_BK_REFS))
+	    {
+	      token->type = OP_BACK_REF;
+	      token->opr.idx = c2 - '1';
+	    }
+	  break;
+	case '<':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_FIRST;
+	    }
+	  break;
+	case '>':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_LAST;
+	    }
+	  break;
+	case 'b':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = WORD_DELIM;
+	    }
+	  break;
+	case 'B':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = NOT_WORD_DELIM;
+	    }
+	  break;
+	case 'w':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_WORD;
+	  break;
+	case 'W':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_NOTWORD;
+	  break;
+	case 's':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_SPACE;
+	  break;
+	case 'S':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    token->type = OP_NOTSPACE;
+	  break;
+	case '`':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = BUF_FIRST;
+	    }
+	  break;
+	case '\'':
+	  if (!(syntax & RE_NO_GNU_OPS))
+	    {
+	      token->type = ANCHOR;
+	      token->opr.ctx_type = BUF_LAST;
+	    }
+	  break;
+	case '(':
+	  if (!(syntax & RE_NO_BK_PARENS))
+	    token->type = OP_OPEN_SUBEXP;
+	  break;
+	case ')':
+	  if (!(syntax & RE_NO_BK_PARENS))
+	    token->type = OP_CLOSE_SUBEXP;
+	  break;
+	case '+':
+	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_PLUS;
+	  break;
+	case '?':
+	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+	    token->type = OP_DUP_QUESTION;
+	  break;
+	case '{':
+	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+	    token->type = OP_OPEN_DUP_NUM;
+	  break;
+	case '}':
+	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+	    token->type = OP_CLOSE_DUP_NUM;
+	  break;
+	default:
+	  break;
+	}
+      return 2;
+    }
+
+  token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1)
+    {
+      wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
+      token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+    }
+  else
+#endif
+    token->word_char = IS_WORD_CHAR (token->opr.c);
+
+  switch (c)
+    {
+    case '\n':
+      if (syntax & RE_NEWLINE_ALT)
+	token->type = OP_ALT;
+      break;
+    case '|':
+      if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+	token->type = OP_ALT;
+      break;
+    case '*':
+      token->type = OP_DUP_ASTERISK;
+      break;
+    case '+':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	token->type = OP_DUP_PLUS;
+      break;
+    case '?':
+      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+	token->type = OP_DUP_QUESTION;
+      break;
+    case '{':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	token->type = OP_OPEN_DUP_NUM;
+      break;
+    case '}':
+      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+	token->type = OP_CLOSE_DUP_NUM;
+      break;
+    case '(':
+      if (syntax & RE_NO_BK_PARENS)
+	token->type = OP_OPEN_SUBEXP;
+      break;
+    case ')':
+      if (syntax & RE_NO_BK_PARENS)
+	token->type = OP_CLOSE_SUBEXP;
+      break;
+    case '[':
+      token->type = OP_OPEN_BRACKET;
+      break;
+    case '.':
+      token->type = OP_PERIOD;
+      break;
+    case '^':
+      if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
+	  re_string_cur_idx (input) != 0)
+	{
+	  char prev = re_string_peek_byte (input, -1);
+	  if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
+	    break;
+	}
+      token->type = ANCHOR;
+      token->opr.ctx_type = LINE_FIRST;
+      break;
+    case '$':
+      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+	  re_string_cur_idx (input) + 1 != re_string_length (input))
+	{
+	  re_token_t next;
+	  re_string_skip_bytes (input, 1);
+	  peek_token (&next, input, syntax);
+	  re_string_skip_bytes (input, -1);
+	  if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+	    break;
+	}
+      token->type = ANCHOR;
+      token->opr.ctx_type = LINE_LAST;
+      break;
+    default:
+      break;
+    }
+  return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+   We must not use this function out of bracket expressions.  */
+
+static int
+internal_function
+peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+  unsigned char c;
+  if (re_string_eoi (input))
+    {
+      token->type = END_OF_RE;
+      return 0;
+    }
+  c = re_string_peek_byte (input, 0);
+  token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+  if (input->mb_cur_max > 1 &&
+      !re_string_first_byte (input, re_string_cur_idx (input)))
+    {
+      token->type = CHARACTER;
+      return 1;
+    }
+#endif /* RE_ENABLE_I18N */
+
+  if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
+      && re_string_cur_idx (input) + 1 < re_string_length (input))
+    {
+      /* In this case, '\' escape a character.  */
+      unsigned char c2;
+      re_string_skip_bytes (input, 1);
+      c2 = re_string_peek_byte (input, 0);
+      token->opr.c = c2;
+      token->type = CHARACTER;
+      return 1;
+    }
+  if (c == '[') /* '[' is a special char in a bracket exps.  */
+    {
+      unsigned char c2;
+      int token_len;
+      if (re_string_cur_idx (input) + 1 < re_string_length (input))
+	c2 = re_string_peek_byte (input, 1);
+      else
+	c2 = 0;
+      token->opr.c = c2;
+      token_len = 2;
+      switch (c2)
+	{
+	case '.':
+	  token->type = OP_OPEN_COLL_ELEM;
+	  break;
+	case '=':
+	  token->type = OP_OPEN_EQUIV_CLASS;
+	  break;
+	case ':':
+	  if (syntax & RE_CHAR_CLASSES)
+	    {
+	      token->type = OP_OPEN_CHAR_CLASS;
+	      break;
+	    }
+	  /* else fall through.  */
+	default:
+	  token->type = CHARACTER;
+	  token->opr.c = c;
+	  token_len = 1;
+	  break;
+	}
+      return token_len;
+    }
+  switch (c)
+    {
+    case '-':
+      token->type = OP_CHARSET_RANGE;
+      break;
+    case ']':
+      token->type = OP_CLOSE_BRACKET;
+      break;
+    case '^':
+      token->type = OP_NON_MATCH_LIST;
+      break;
+    default:
+      token->type = CHARACTER;
+    }
+  return 1;
+}
+
+/* Functions for parser.  */
+
+/* Entry point of the parser.
+   Parse the regular expression REGEXP and return the structure tree.
+   If an error is occured, ERR is set by error code, and return NULL.
+   This function build the following tree, from regular expression <reg_exp>:
+	   CAT
+	   / \
+	  /   \
+   <reg_exp>  EOR
+
+   CAT means concatenation.
+   EOR means end of regular expression.  */
+
+static bin_tree_t *
+parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
+       reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *eor, *root;
+  re_token_t current_token;
+  dfa->syntax = syntax;
+  fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+  tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+  eor = create_tree (dfa, NULL, NULL, END_OF_RE);
+  if (tree != NULL)
+    root = create_tree (dfa, tree, eor, CONCAT);
+  else
+    root = eor;
+  if (BE (eor == NULL || root == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  return root;
+}
+
+/* This function build the following tree, from regular expression
+   <branch1>|<branch2>:
+	   ALT
+	   / \
+	  /   \
+   <branch1> <branch2>
+
+   ALT means alternative, which represents the operator `|'.  */
+
+static bin_tree_t *
+parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	       reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree, *branch = NULL;
+  tree = parse_branch (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type == OP_ALT)
+    {
+      fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+      if (token->type != OP_ALT && token->type != END_OF_RE
+	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+	{
+	  branch = parse_branch (regexp, preg, token, syntax, nest, err);
+	  if (BE (*err != REG_NOERROR && branch == NULL, 0))
+	    return NULL;
+	}
+      else
+	branch = NULL;
+      tree = create_tree (dfa, tree, branch, OP_ALT);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   <exp1><exp2>:
+	CAT
+	/ \
+       /   \
+   <exp1> <exp2>
+
+   CAT means concatenation.  */
+
+static bin_tree_t *
+parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	      reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  bin_tree_t *tree, *exp;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  tree = parse_expression (regexp, preg, token, syntax, nest, err);
+  if (BE (*err != REG_NOERROR && tree == NULL, 0))
+    return NULL;
+
+  while (token->type != OP_ALT && token->type != END_OF_RE
+	 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+    {
+      exp = parse_expression (regexp, preg, token, syntax, nest, err);
+      if (BE (*err != REG_NOERROR && exp == NULL, 0))
+	{
+	  return NULL;
+	}
+      if (tree != NULL && exp != NULL)
+	{
+	  tree = create_tree (dfa, tree, exp, CONCAT);
+	  if (tree == NULL)
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      else if (tree == NULL)
+	tree = exp;
+      /* Otherwise exp == NULL, we don't need to create new tree.  */
+    }
+  return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+	 *
+	 |
+	 a
+*/
+
+static bin_tree_t *
+parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
+		  reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree;
+  switch (token->type)
+    {
+    case CHARACTER:
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+#ifdef RE_ENABLE_I18N
+      if (dfa->mb_cur_max > 1)
+	{
+	  while (!re_string_eoi (regexp)
+		 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+	    {
+	      bin_tree_t *mbc_remain;
+	      fetch_token (token, regexp, syntax);
+	      mbc_remain = create_token_tree (dfa, NULL, NULL, token);
+	      tree = create_tree (dfa, tree, mbc_remain, CONCAT);
+	      if (BE (mbc_remain == NULL || tree == NULL, 0))
+		{
+		  *err = REG_ESPACE;
+		  return NULL;
+		}
+	    }
+	}
+#endif
+      break;
+    case OP_OPEN_SUBEXP:
+      tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_OPEN_BRACKET:
+      tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_BACK_REF:
+      if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
+	{
+	  *err = REG_ESUBREG;
+	  return NULL;
+	}
+      dfa->used_bkref_map |= 1 << token->opr.idx;
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      ++dfa->nbackref;
+      dfa->has_mb_node = 1;
+      break;
+    case OP_OPEN_DUP_NUM:
+      if (syntax & RE_CONTEXT_INVALID_DUP)
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+      /* FALLTHROUGH */
+    case OP_DUP_ASTERISK:
+    case OP_DUP_PLUS:
+    case OP_DUP_QUESTION:
+      if (syntax & RE_CONTEXT_INVALID_OPS)
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+      else if (syntax & RE_CONTEXT_INDEP_OPS)
+	{
+	  fetch_token (token, regexp, syntax);
+	  return parse_expression (regexp, preg, token, syntax, nest, err);
+	}
+      /* else fall through  */
+    case OP_CLOSE_SUBEXP:
+      if ((token->type == OP_CLOSE_SUBEXP) &&
+	  !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+	{
+	  *err = REG_ERPAREN;
+	  return NULL;
+	}
+      /* else fall through  */
+    case OP_CLOSE_DUP_NUM:
+      /* We treat it as a normal character.  */
+
+      /* Then we can these characters as normal characters.  */
+      token->type = CHARACTER;
+      /* mb_partial and word_char bits should be initialized already
+	 by peek_token.  */
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      break;
+    case ANCHOR:
+      if ((token->opr.ctx_type
+	   & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
+	  && dfa->word_ops_used == 0)
+	init_word_char (dfa);
+      if (token->opr.ctx_type == WORD_DELIM
+          || token->opr.ctx_type == NOT_WORD_DELIM)
+	{
+	  bin_tree_t *tree_first, *tree_last;
+	  if (token->opr.ctx_type == WORD_DELIM)
+	    {
+	      token->opr.ctx_type = WORD_FIRST;
+	      tree_first = create_token_tree (dfa, NULL, NULL, token);
+	      token->opr.ctx_type = WORD_LAST;
+            }
+          else
+            {
+	      token->opr.ctx_type = INSIDE_WORD;
+	      tree_first = create_token_tree (dfa, NULL, NULL, token);
+	      token->opr.ctx_type = INSIDE_NOTWORD;
+            }
+	  tree_last = create_token_tree (dfa, NULL, NULL, token);
+	  tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
+	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      else
+	{
+	  tree = create_token_tree (dfa, NULL, NULL, token);
+	  if (BE (tree == NULL, 0))
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      /* We must return here, since ANCHORs can't be followed
+	 by repetition operators.
+	 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+	     it must not be "<ANCHOR(^)><REPEAT(*)>".  */
+      fetch_token (token, regexp, syntax);
+      return tree;
+    case OP_PERIOD:
+      tree = create_token_tree (dfa, NULL, NULL, token);
+      if (BE (tree == NULL, 0))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+      if (dfa->mb_cur_max > 1)
+	dfa->has_mb_node = 1;
+      break;
+    case OP_WORD:
+    case OP_NOTWORD:
+      tree = build_charclass_op (dfa, regexp->trans,
+				 (const unsigned char *) "alnum",
+				 (const unsigned char *) "_",
+				 token->type == OP_NOTWORD, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_SPACE:
+    case OP_NOTSPACE:
+      tree = build_charclass_op (dfa, regexp->trans,
+				 (const unsigned char *) "space",
+				 (const unsigned char *) "",
+				 token->type == OP_NOTSPACE, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      break;
+    case OP_ALT:
+    case END_OF_RE:
+      return NULL;
+    case BACK_SLASH:
+      *err = REG_EESCAPE;
+      return NULL;
+    default:
+      /* Must not happen?  */
+#ifdef DEBUG
+      assert (0);
+#endif
+      return NULL;
+    }
+  fetch_token (token, regexp, syntax);
+
+  while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+	 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+    {
+      tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
+	return NULL;
+      /* In BRE consecutive duplications are not allowed.  */
+      if ((syntax & RE_CONTEXT_INVALID_DUP)
+	  && (token->type == OP_DUP_ASTERISK
+	      || token->type == OP_OPEN_DUP_NUM))
+	{
+	  *err = REG_BADRPT;
+	  return NULL;
+	}
+    }
+
+  return tree;
+}
+
+/* This function build the following tree, from regular expression
+   (<reg_exp>):
+	 SUBEXP
+	    |
+	<reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+	       reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  bin_tree_t *tree;
+  size_t cur_nsub;
+  cur_nsub = preg->re_nsub++;
+
+  fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+
+  /* The subexpression may be a null string.  */
+  if (token->type == OP_CLOSE_SUBEXP)
+    tree = NULL;
+  else
+    {
+      tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+      if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
+        *err = REG_EPAREN;
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+    }
+
+  if (cur_nsub <= '9' - '1')
+    dfa->completed_bkref_map |= 1 << cur_nsub;
+
+  tree = create_tree (dfa, tree, NULL, SUBEXP);
+  if (BE (tree == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  tree->token.opr.idx = cur_nsub;
+  return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc.  */
+
+static bin_tree_t *
+parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
+	      re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
+{
+  bin_tree_t *tree = NULL, *old_tree = NULL;
+  int i, start, end, start_idx = re_string_cur_idx (regexp);
+  re_token_t start_token = *token;
+
+  if (token->type == OP_OPEN_DUP_NUM)
+    {
+      end = 0;
+      start = fetch_number (regexp, token, syntax);
+      if (start == -1)
+	{
+	  if (token->type == CHARACTER && token->opr.c == ',')
+	    start = 0; /* We treat "{,m}" as "{0,m}".  */
+	  else
+	    {
+	      *err = REG_BADBR; /* <re>{} is invalid.  */
+	      return NULL;
+	    }
+	}
+      if (BE (start != -2, 1))
+	{
+	  /* We treat "{n}" as "{n,n}".  */
+	  end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+		 : ((token->type == CHARACTER && token->opr.c == ',')
+		    ? fetch_number (regexp, token, syntax) : -2));
+	}
+      if (BE (start == -2 || end == -2, 0))
+	{
+	  /* Invalid sequence.  */
+	  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+	    {
+	      if (token->type == END_OF_RE)
+		*err = REG_EBRACE;
+	      else
+		*err = REG_BADBR;
+
+	      return NULL;
+	    }
+
+	  /* If the syntax bit is set, rollback.  */
+	  re_string_set_index (regexp, start_idx);
+	  *token = start_token;
+	  token->type = CHARACTER;
+	  /* mb_partial and word_char bits should be already initialized by
+	     peek_token.  */
+	  return elem;
+	}
+
+      if (BE (end != -1 && start > end, 0))
+	{
+	  /* First number greater than second.  */
+	  *err = REG_BADBR;
+	  return NULL;
+	}
+    }
+  else
+    {
+      start = (token->type == OP_DUP_PLUS) ? 1 : 0;
+      end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
+    }
+
+  fetch_token (token, regexp, syntax);
+
+  if (BE (elem == NULL, 0))
+    return NULL;
+  if (BE (start == 0 && end == 0, 0))
+    {
+      postorder (elem, free_tree, NULL);
+      return NULL;
+    }
+
+  /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
+  if (BE (start > 0, 0))
+    {
+      tree = elem;
+      for (i = 2; i <= start; ++i)
+	{
+	  elem = duplicate_tree (elem, dfa);
+	  tree = create_tree (dfa, tree, elem, CONCAT);
+	  if (BE (elem == NULL || tree == NULL, 0))
+	    goto parse_dup_op_espace;
+	}
+
+      if (start == end)
+	return tree;
+
+      /* Duplicate ELEM before it is marked optional.  */
+      elem = duplicate_tree (elem, dfa);
+      old_tree = tree;
+    }
+  else
+    old_tree = NULL;
+
+  if (elem->token.type == SUBEXP)
+    postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
+
+  tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
+  if (BE (tree == NULL, 0))
+    goto parse_dup_op_espace;
+
+  /* This loop is actually executed only when end != -1,
+     to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?...  We have
+     already created the start+1-th copy.  */
+  for (i = start + 2; i <= end; ++i)
+    {
+      elem = duplicate_tree (elem, dfa);
+      tree = create_tree (dfa, tree, elem, CONCAT);
+      if (BE (elem == NULL || tree == NULL, 0))
+        goto parse_dup_op_espace;
+
+      tree = create_tree (dfa, tree, NULL, OP_ALT);
+      if (BE (tree == NULL, 0))
+        goto parse_dup_op_espace;
+    }
+
+  if (old_tree)
+    tree = create_tree (dfa, old_tree, tree, CONCAT);
+
+  return tree;
+
+ parse_dup_op_espace:
+  *err = REG_ESPACE;
+  return NULL;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+   I'm not sure, but maybe enough.  */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+  /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+		 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
+# else /* not RE_ENABLE_I18N */
+build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
+		 bracket_elem_t *end_elem)
+# endif /* not RE_ENABLE_I18N */
+{
+  unsigned int start_ch, end_ch;
+  /* Equivalence Classes and Character Classes can't be a range start/end.  */
+  if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+	  || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+	  0))
+    return REG_ERANGE;
+
+  /* We can handle no multi character collating elements without libc
+     support.  */
+  if (BE ((start_elem->type == COLL_SYM
+	   && strlen ((char *) start_elem->opr.name) > 1)
+	  || (end_elem->type == COLL_SYM
+	      && strlen ((char *) end_elem->opr.name) > 1), 0))
+    return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+  {
+    wchar_t wc;
+    wint_t start_wc;
+    wint_t end_wc;
+    wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+    start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+		   : 0));
+    end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+		 : 0));
+    start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+		? __btowc (start_ch) : start_elem->opr.wch);
+    end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+	      ? __btowc (end_ch) : end_elem->opr.wch);
+    if (start_wc == WEOF || end_wc == WEOF)
+      return REG_ECOLLATE;
+    cmp_buf[0] = start_wc;
+    cmp_buf[4] = end_wc;
+    if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+      return REG_ERANGE;
+
+    /* Got valid collation sequence values, add them as a new entry.
+       However, for !_LIBC we have no collation elements: if the
+       character set is single byte, the single byte character set
+       that we build below suffices.  parse_bracket_exp passes
+       no MBCSET if dfa->mb_cur_max == 1.  */
+    if (mbcset)
+      {
+        /* Check the space of the arrays.  */
+        if (BE (*range_alloc == mbcset->nranges, 0))
+          {
+	    /* There is not enough space, need realloc.  */
+	    wchar_t *new_array_start, *new_array_end;
+	    int new_nranges;
+
+	    /* +1 in case of mbcset->nranges is 0.  */
+	    new_nranges = 2 * mbcset->nranges + 1;
+	    /* Use realloc since mbcset->range_starts and mbcset->range_ends
+	       are NULL if *range_alloc == 0.  */
+	    new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+				          new_nranges);
+	    new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+				        new_nranges);
+
+	    if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+	      return REG_ESPACE;
+
+	    mbcset->range_starts = new_array_start;
+	    mbcset->range_ends = new_array_end;
+	    *range_alloc = new_nranges;
+          }
+
+        mbcset->range_starts[mbcset->nranges] = start_wc;
+        mbcset->range_ends[mbcset->nranges++] = end_wc;
+      }
+
+    /* Build the table for single byte characters.  */
+    for (wc = 0; wc < SBC_MAX; ++wc)
+      {
+	cmp_buf[2] = wc;
+	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+	    && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+	  bitset_set (sbcset, wc);
+      }
+  }
+# else /* not RE_ENABLE_I18N */
+  {
+    unsigned int ch;
+    start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+		   : 0));
+    end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+		 : 0));
+    if (start_ch > end_ch)
+      return REG_ERANGE;
+    /* Build the table for single byte characters.  */
+    for (ch = 0; ch < SBC_MAX; ++ch)
+      if (start_ch <= ch  && ch <= end_ch)
+	bitset_set (sbcset, ch);
+  }
+# endif /* not RE_ENABLE_I18N */
+  return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+   Build the collating element which is represented by NAME.
+   The result are written to MBCSET and SBCSET.
+   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+   pointer argument since we may update it.  */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+			int *coll_sym_alloc, const unsigned char *name)
+# else /* not RE_ENABLE_I18N */
+build_collating_symbol (bitset_t sbcset, const unsigned char *name)
+# endif /* not RE_ENABLE_I18N */
+{
+  size_t name_len = strlen ((const char *) name);
+  if (BE (name_len != 1, 0))
+    return REG_ECOLLATE;
+  else
+    {
+      bitset_set (sbcset, name[0]);
+      return REG_NOERROR;
+    }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+   "[[.a-a.]]" etc.  */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+		   reg_syntax_t syntax, reg_errcode_t *err)
+{
+#ifdef _LIBC
+  const unsigned char *collseqmb;
+  const char *collseqwc;
+  uint32_t nrules;
+  int32_t table_size;
+  const int32_t *symb_table;
+  const unsigned char *extra;
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Seek the collating symbol entry correspondings to NAME.
+     Return the index of the symbol in the SYMB_TABLE.  */
+
+  auto inline int32_t
+  __attribute ((always_inline))
+  seek_collating_symbol_entry (name, name_len)
+	 const unsigned char *name;
+	 size_t name_len;
+    {
+      int32_t hash = elem_hash ((const char *) name, name_len);
+      int32_t elem = hash % table_size;
+      if (symb_table[2 * elem] != 0)
+	{
+	  int32_t second = hash % (table_size - 2) + 1;
+
+	  do
+	    {
+	      /* First compare the hashing value.  */
+	      if (symb_table[2 * elem] == hash
+		  /* Compare the length of the name.  */
+		  && name_len == extra[symb_table[2 * elem + 1]]
+		  /* Compare the name.  */
+		  && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+			     name_len) == 0)
+		{
+		  /* Yep, this is the entry.  */
+		  break;
+		}
+
+	      /* Next entry.  */
+	      elem += second;
+	    }
+	  while (symb_table[2 * elem] != 0);
+	}
+      return elem;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Look up the collation sequence value of BR_ELEM.
+     Return the value if succeeded, UINT_MAX otherwise.  */
+
+  auto inline unsigned int
+  __attribute ((always_inline))
+  lookup_collation_sequence_value (br_elem)
+	 bracket_elem_t *br_elem;
+    {
+      if (br_elem->type == SB_CHAR)
+	{
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    return collseqmb[br_elem->opr.ch];
+	  else
+	    {
+	      wint_t wc = __btowc (br_elem->opr.ch);
+	      return __collseq_table_lookup (collseqwc, wc);
+	    }
+	}
+      else if (br_elem->type == MB_CHAR)
+	{
+	  return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+	}
+      else if (br_elem->type == COLL_SYM)
+	{
+	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+	  if (nrules != 0)
+	    {
+	      int32_t elem, idx;
+	      elem = seek_collating_symbol_entry (br_elem->opr.name,
+						  sym_name_len);
+	      if (symb_table[2 * elem] != 0)
+		{
+		  /* We found the entry.  */
+		  idx = symb_table[2 * elem + 1];
+		  /* Skip the name of collating element name.  */
+		  idx += 1 + extra[idx];
+		  /* Skip the byte sequence of the collating element.  */
+		  idx += 1 + extra[idx];
+		  /* Adjust for the alignment.  */
+		  idx = (idx + 3) & ~3;
+		  /* Skip the multibyte collation sequence value.  */
+		  idx += sizeof (unsigned int);
+		  /* Skip the wide char sequence of the collating element.  */
+		  idx += sizeof (unsigned int) *
+		    (1 + *(unsigned int *) (extra + idx));
+		  /* Return the collation sequence value.  */
+		  return *(unsigned int *) (extra + idx);
+		}
+	      else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+		{
+		  /* No valid character.  Match it as a single byte
+		     character.  */
+		  return collseqmb[br_elem->opr.name[0]];
+		}
+	    }
+	  else if (sym_name_len == 1)
+	    return collseqmb[br_elem->opr.name[0]];
+	}
+      return UINT_MAX;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument sinse we may
+     update it.  */
+
+  auto inline reg_errcode_t
+  __attribute ((always_inline))
+  build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+	 re_charset_t *mbcset;
+	 int *range_alloc;
+	 bitset_t sbcset;
+	 bracket_elem_t *start_elem, *end_elem;
+    {
+      unsigned int ch;
+      uint32_t start_collseq;
+      uint32_t end_collseq;
+
+      /* Equivalence Classes and Character Classes can't be a range
+	 start/end.  */
+      if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+	      || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+	      0))
+	return REG_ERANGE;
+
+      start_collseq = lookup_collation_sequence_value (start_elem);
+      end_collseq = lookup_collation_sequence_value (end_elem);
+      /* Check start/end collation sequence values.  */
+      if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+	return REG_ECOLLATE;
+      if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+	return REG_ERANGE;
+
+      /* Got valid collation sequence values, add them as a new entry.
+	 However, if we have no collation elements, and the character set
+	 is single byte, the single byte character set that we
+	 build below suffices. */
+      if (nrules > 0 || dfa->mb_cur_max > 1)
+	{
+          /* Check the space of the arrays.  */
+          if (BE (*range_alloc == mbcset->nranges, 0))
+	    {
+	      /* There is not enough space, need realloc.  */
+	      uint32_t *new_array_start;
+	      uint32_t *new_array_end;
+	      int new_nranges;
+
+	      /* +1 in case of mbcset->nranges is 0.  */
+	      new_nranges = 2 * mbcset->nranges + 1;
+	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+					    new_nranges);
+	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+				          new_nranges);
+
+	      if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+	        return REG_ESPACE;
+
+	      mbcset->range_starts = new_array_start;
+	      mbcset->range_ends = new_array_end;
+	      *range_alloc = new_nranges;
+	    }
+
+          mbcset->range_starts[mbcset->nranges] = start_collseq;
+          mbcset->range_ends[mbcset->nranges++] = end_collseq;
+	}
+
+      /* Build the table for single byte characters.  */
+      for (ch = 0; ch < SBC_MAX; ch++)
+	{
+	  uint32_t ch_collseq;
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    ch_collseq = collseqmb[ch];
+	  else
+	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+	    bitset_set (sbcset, ch);
+	}
+      return REG_NOERROR;
+    }
+
+  /* Local function for parse_bracket_exp used in _LIBC environement.
+     Build the collating element which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+     pointer argument sinse we may update it.  */
+
+  auto inline reg_errcode_t
+  __attribute ((always_inline))
+  build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+	 re_charset_t *mbcset;
+	 int *coll_sym_alloc;
+	 bitset_t sbcset;
+	 const unsigned char *name;
+    {
+      int32_t elem, idx;
+      size_t name_len = strlen ((const char *) name);
+      if (nrules != 0)
+	{
+	  elem = seek_collating_symbol_entry (name, name_len);
+	  if (symb_table[2 * elem] != 0)
+	    {
+	      /* We found the entry.  */
+	      idx = symb_table[2 * elem + 1];
+	      /* Skip the name of collating element name.  */
+	      idx += 1 + extra[idx];
+	    }
+	  else if (symb_table[2 * elem] == 0 && name_len == 1)
+	    {
+	      /* No valid character, treat it as a normal
+		 character.  */
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
+	  else
+	    return REG_ECOLLATE;
+
+	  /* Got valid collation sequence, add it as a new entry.  */
+	  /* Check the space of the arrays.  */
+	  if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
+	    {
+	      /* Not enough, realloc it.  */
+	      /* +1 in case of mbcset->ncoll_syms is 0.  */
+	      int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+	      /* Use realloc since mbcset->coll_syms is NULL
+		 if *alloc == 0.  */
+	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+						   new_coll_sym_alloc);
+	      if (BE (new_coll_syms == NULL, 0))
+		return REG_ESPACE;
+	      mbcset->coll_syms = new_coll_syms;
+	      *coll_sym_alloc = new_coll_sym_alloc;
+	    }
+	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+	  return REG_NOERROR;
+	}
+      else
+	{
+	  if (BE (name_len != 1, 0))
+	    return REG_ECOLLATE;
+	  else
+	    {
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
+	}
+    }
+#endif
+
+  re_token_t br_token;
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+  int equiv_class_alloc = 0, char_class_alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+  int non_match = 0;
+  bin_tree_t *work_tree;
+  int token_len;
+  int first_round = 1;
+#ifdef _LIBC
+  collseqmb = (const unsigned char *)
+    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules)
+    {
+      /*
+      if (MB_CUR_MAX > 1)
+      */
+      collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+      table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+      symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						  _NL_COLLATE_SYMB_TABLEMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+						   _NL_COLLATE_SYMB_EXTRAMB);
+    }
+#endif
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+  if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  token_len = peek_token_bracket (token, regexp, syntax);
+  if (BE (token->type == END_OF_RE, 0))
+    {
+      *err = REG_BADPAT;
+      goto parse_bracket_exp_free_return;
+    }
+  if (token->type == OP_NON_MATCH_LIST)
+    {
+#ifdef RE_ENABLE_I18N
+      mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+      non_match = 1;
+      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+	bitset_set (sbcset, '\0');
+      re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+      token_len = peek_token_bracket (token, regexp, syntax);
+      if (BE (token->type == END_OF_RE, 0))
+	{
+	  *err = REG_BADPAT;
+	  goto parse_bracket_exp_free_return;
+	}
+    }
+
+  /* We treat the first ']' as a normal character.  */
+  if (token->type == OP_CLOSE_BRACKET)
+    token->type = CHARACTER;
+
+  while (1)
+    {
+      bracket_elem_t start_elem, end_elem;
+      unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+      unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+      reg_errcode_t ret;
+      int token_len2 = 0, is_range_exp = 0;
+      re_token_t token2;
+
+      start_elem.opr.name = start_name_buf;
+      ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+				   syntax, first_round);
+      if (BE (ret != REG_NOERROR, 0))
+	{
+	  *err = ret;
+	  goto parse_bracket_exp_free_return;
+	}
+      first_round = 0;
+
+      /* Get information about the next token.  We need it in any case.  */
+      token_len = peek_token_bracket (token, regexp, syntax);
+
+      /* Do not check for ranges if we know they are not allowed.  */
+      if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
+	{
+	  if (BE (token->type == END_OF_RE, 0))
+	    {
+	      *err = REG_EBRACK;
+	      goto parse_bracket_exp_free_return;
+	    }
+	  if (token->type == OP_CHARSET_RANGE)
+	    {
+	      re_string_skip_bytes (regexp, token_len); /* Skip '-'.  */
+	      token_len2 = peek_token_bracket (&token2, regexp, syntax);
+	      if (BE (token2.type == END_OF_RE, 0))
+		{
+		  *err = REG_EBRACK;
+		  goto parse_bracket_exp_free_return;
+		}
+	      if (token2.type == OP_CLOSE_BRACKET)
+		{
+		  /* We treat the last '-' as a normal character.  */
+		  re_string_skip_bytes (regexp, -token_len);
+		  token->type = CHARACTER;
+		}
+	      else
+		is_range_exp = 1;
+	    }
+	}
+
+      if (is_range_exp == 1)
+	{
+	  end_elem.opr.name = end_name_buf;
+	  ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+				       dfa, syntax, 1);
+	  if (BE (ret != REG_NOERROR, 0))
+	    {
+	      *err = ret;
+	      goto parse_bracket_exp_free_return;
+	    }
+
+	  token_len = peek_token_bracket (token, regexp, syntax);
+
+#ifdef _LIBC
+	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
+				  &start_elem, &end_elem);
+#else
+# ifdef RE_ENABLE_I18N
+	  *err = build_range_exp (sbcset,
+				  dfa->mb_cur_max > 1 ? mbcset : NULL,
+				  &range_alloc, &start_elem, &end_elem);
+# else
+	  *err = build_range_exp (sbcset, &start_elem, &end_elem);
+# endif
+#endif /* RE_ENABLE_I18N */
+	  if (BE (*err != REG_NOERROR, 0))
+	    goto parse_bracket_exp_free_return;
+	}
+      else
+	{
+	  switch (start_elem.type)
+	    {
+	    case SB_CHAR:
+	      bitset_set (sbcset, start_elem.opr.ch);
+	      break;
+#ifdef RE_ENABLE_I18N
+	    case MB_CHAR:
+	      /* Check whether the array has enough space.  */
+	      if (BE (mbchar_alloc == mbcset->nmbchars, 0))
+		{
+		  wchar_t *new_mbchars;
+		  /* Not enough, realloc it.  */
+		  /* +1 in case of mbcset->nmbchars is 0.  */
+		  mbchar_alloc = 2 * mbcset->nmbchars + 1;
+		  /* Use realloc since array is NULL if *alloc == 0.  */
+		  new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
+					    mbchar_alloc);
+		  if (BE (new_mbchars == NULL, 0))
+		    goto parse_bracket_exp_espace;
+		  mbcset->mbchars = new_mbchars;
+		}
+	      mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+	      break;
+#endif /* RE_ENABLE_I18N */
+	    case EQUIV_CLASS:
+	      *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+					mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+					start_elem.opr.name);
+	      if (BE (*err != REG_NOERROR, 0))
+		goto parse_bracket_exp_free_return;
+	      break;
+	    case COLL_SYM:
+	      *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+					     mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+					     start_elem.opr.name);
+	      if (BE (*err != REG_NOERROR, 0))
+		goto parse_bracket_exp_free_return;
+	      break;
+	    case CHAR_CLASS:
+	      *err = build_charclass (regexp->trans, sbcset,
+#ifdef RE_ENABLE_I18N
+				      mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+				      start_elem.opr.name, syntax);
+	      if (BE (*err != REG_NOERROR, 0))
+	       goto parse_bracket_exp_free_return;
+	      break;
+	    default:
+	      assert (0);
+	      break;
+	    }
+	}
+      if (BE (token->type == END_OF_RE, 0))
+	{
+	  *err = REG_EBRACK;
+	  goto parse_bracket_exp_free_return;
+	}
+      if (token->type == OP_CLOSE_BRACKET)
+	break;
+    }
+
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+
+  /* If it is non-matching list.  */
+  if (non_match)
+    bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure only single byte characters are set.  */
+  if (dfa->mb_cur_max > 1)
+    bitset_mask (sbcset, dfa->sb_char);
+
+  if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+      || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
+						     || mbcset->non_match)))
+    {
+      bin_tree_t *mbc_tree;
+      int sbc_idx;
+      /* Build a tree for complex bracket.  */
+      dfa->has_mb_node = 1;
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (mbc_tree == NULL, 0))
+	goto parse_bracket_exp_espace;
+      for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
+	if (sbcset[sbc_idx])
+	  break;
+      /* If there are no bits set in sbcset, there is no point
+	 of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */
+      if (sbc_idx < BITSET_WORDS)
+	{
+          /* Build a tree for simple bracket.  */
+          br_token.type = SIMPLE_BRACKET;
+          br_token.opr.sbcset = sbcset;
+          work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+          if (BE (work_tree == NULL, 0))
+            goto parse_bracket_exp_espace;
+
+          /* Then join them by ALT node.  */
+          work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
+          if (BE (work_tree == NULL, 0))
+            goto parse_bracket_exp_espace;
+	}
+      else
+	{
+	  re_free (sbcset);
+	  work_tree = mbc_tree;
+	}
+    }
+  else
+#endif /* not RE_ENABLE_I18N */
+    {
+#ifdef RE_ENABLE_I18N
+      free_charset (mbcset);
+#endif
+      /* Build a tree for simple bracket.  */
+      br_token.type = SIMPLE_BRACKET;
+      br_token.opr.sbcset = sbcset;
+      work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (work_tree == NULL, 0))
+        goto parse_bracket_exp_espace;
+    }
+  return work_tree;
+
+ parse_bracket_exp_espace:
+  *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  return NULL;
+}
+
+/* Parse an element in the bracket expression.  */
+
+static reg_errcode_t
+parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
+		       re_token_t *token, int token_len, re_dfa_t *dfa,
+		       reg_syntax_t syntax, int accept_hyphen)
+{
+#ifdef RE_ENABLE_I18N
+  int cur_char_size;
+  cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+  if (cur_char_size > 1)
+    {
+      elem->type = MB_CHAR;
+      elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+      re_string_skip_bytes (regexp, cur_char_size);
+      return REG_NOERROR;
+    }
+#endif /* RE_ENABLE_I18N */
+  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
+  if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+      || token->type == OP_OPEN_EQUIV_CLASS)
+    return parse_bracket_symbol (elem, regexp, token);
+  if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
+    {
+      /* A '-' must only appear as anything but a range indicator before
+	 the closing bracket.  Everything else is an error.  */
+      re_token_t token2;
+      (void) peek_token_bracket (&token2, regexp, syntax);
+      if (token2.type != OP_CLOSE_BRACKET)
+	/* The actual error value is not standardized since this whole
+	   case is undefined.  But ERANGE makes good sense.  */
+	return REG_ERANGE;
+    }
+  elem->type = SB_CHAR;
+  elem->opr.ch = token->opr.c;
+  return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression.  Bracket symbols are
+   such as [:<character_class>:], [.<collating_element>.], and
+   [=<equivalent_class>=].  */
+
+static reg_errcode_t
+parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
+		      re_token_t *token)
+{
+  unsigned char ch, delim = token->opr.c;
+  int i = 0;
+  if (re_string_eoi(regexp))
+    return REG_EBRACK;
+  for (;; ++i)
+    {
+      if (i >= BRACKET_NAME_BUF_SIZE)
+	return REG_EBRACK;
+      if (token->type == OP_OPEN_CHAR_CLASS)
+	ch = re_string_fetch_byte_case (regexp);
+      else
+	ch = re_string_fetch_byte (regexp);
+      if (re_string_eoi(regexp))
+	return REG_EBRACK;
+      if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+	break;
+      elem->opr.name[i] = ch;
+    }
+  re_string_skip_bytes (regexp, 1);
+  elem->opr.name[i] = '\0';
+  switch (token->type)
+    {
+    case OP_OPEN_COLL_ELEM:
+      elem->type = COLL_SYM;
+      break;
+    case OP_OPEN_EQUIV_CLASS:
+      elem->type = EQUIV_CLASS;
+      break;
+    case OP_OPEN_CHAR_CLASS:
+      elem->type = CHAR_CLASS;
+      break;
+    default:
+      break;
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the equivalence class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
+		   int *equiv_class_alloc, const unsigned char *name)
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (bitset_t sbcset, const unsigned char *name)
+#endif /* not RE_ENABLE_I18N */
+{
+#ifdef _LIBC
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules != 0)
+    {
+      const int32_t *table, *indirect;
+      const unsigned char *weights, *extra, *cp;
+      unsigned char char_buf[2];
+      int32_t idx1, idx2;
+      unsigned int ch;
+      size_t len;
+      /* This #include defines a local function!  */
+# include <locale/weight.h>
+      /* Calculate the index for equivalence class.  */
+      cp = name;
+      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+      weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+					       _NL_COLLATE_WEIGHTMB);
+      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+						   _NL_COLLATE_EXTRAMB);
+      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+						_NL_COLLATE_INDIRECTMB);
+      idx1 = findidx (&cp);
+      if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+	/* This isn't a valid character.  */
+	return REG_ECOLLATE;
+
+      /* Build single byte matcing table for this equivalence class.  */
+      char_buf[1] = (unsigned char) '\0';
+      len = weights[idx1];
+      for (ch = 0; ch < SBC_MAX; ++ch)
+	{
+	  char_buf[0] = ch;
+	  cp = char_buf;
+	  idx2 = findidx (&cp);
+/*
+	  idx2 = table[ch];
+*/
+	  if (idx2 == 0)
+	    /* This isn't a valid character.  */
+	    continue;
+	  if (len == weights[idx2])
+	    {
+	      int cnt = 0;
+	      while (cnt <= len &&
+		     weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
+		++cnt;
+
+	      if (cnt > len)
+		bitset_set (sbcset, ch);
+	    }
+	}
+      /* Check whether the array has enough space.  */
+      if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
+	{
+	  /* Not enough, realloc it.  */
+	  /* +1 in case of mbcset->nequiv_classes is 0.  */
+	  int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+	  /* Use realloc since the array is NULL if *alloc == 0.  */
+	  int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
+						   int32_t,
+						   new_equiv_class_alloc);
+	  if (BE (new_equiv_classes == NULL, 0))
+	    return REG_ESPACE;
+	  mbcset->equiv_classes = new_equiv_classes;
+	  *equiv_class_alloc = new_equiv_class_alloc;
+	}
+      mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+    }
+  else
+#endif /* _LIBC */
+    {
+      if (BE (strlen ((const char *) name) != 1, 0))
+	return REG_ECOLLATE;
+      bitset_set (sbcset, *name);
+    }
+  return REG_NOERROR;
+}
+
+  /* Helper function for parse_bracket_exp.
+     Build the character class which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+     is a pointer argument sinse we may update it.  */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+		 re_charset_t *mbcset, int *char_class_alloc,
+		 const unsigned char *class_name, reg_syntax_t syntax)
+#else /* not RE_ENABLE_I18N */
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+		 const unsigned char *class_name, reg_syntax_t syntax)
+#endif /* not RE_ENABLE_I18N */
+{
+  int i;
+  const char *name = (const char *) class_name;
+
+  /* In case of REG_ICASE "upper" and "lower" match the both of
+     upper and lower cases.  */
+  if ((syntax & RE_ICASE)
+      && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
+    name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+  /* Check the space of the arrays.  */
+  if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
+    {
+      /* Not enough, realloc it.  */
+      /* +1 in case of mbcset->nchar_classes is 0.  */
+      int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
+      /* Use realloc since array is NULL if *alloc == 0.  */
+      wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
+					       new_char_class_alloc);
+      if (BE (new_char_classes == NULL, 0))
+	return REG_ESPACE;
+      mbcset->char_classes = new_char_classes;
+      *char_class_alloc = new_char_class_alloc;
+    }
+  mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func)	\
+  do {						\
+    if (BE (trans != NULL, 0))			\
+      {						\
+	for (i = 0; i < SBC_MAX; ++i)		\
+  	  if (ctype_func (i))			\
+	    bitset_set (sbcset, trans[i]);	\
+      }						\
+    else					\
+      {						\
+	for (i = 0; i < SBC_MAX; ++i)		\
+  	  if (ctype_func (i))			\
+	    bitset_set (sbcset, i);		\
+      }						\
+  } while (0)
+
+  if (strcmp (name, "alnum") == 0)
+    BUILD_CHARCLASS_LOOP (isalnum);
+  else if (strcmp (name, "cntrl") == 0)
+    BUILD_CHARCLASS_LOOP (iscntrl);
+  else if (strcmp (name, "lower") == 0)
+    BUILD_CHARCLASS_LOOP (islower);
+  else if (strcmp (name, "space") == 0)
+    BUILD_CHARCLASS_LOOP (isspace);
+  else if (strcmp (name, "alpha") == 0)
+    BUILD_CHARCLASS_LOOP (isalpha);
+  else if (strcmp (name, "digit") == 0)
+    BUILD_CHARCLASS_LOOP (isdigit);
+  else if (strcmp (name, "print") == 0)
+    BUILD_CHARCLASS_LOOP (isprint);
+  else if (strcmp (name, "upper") == 0)
+    BUILD_CHARCLASS_LOOP (isupper);
+  else if (strcmp (name, "blank") == 0)
+    BUILD_CHARCLASS_LOOP (isblank);
+  else if (strcmp (name, "graph") == 0)
+    BUILD_CHARCLASS_LOOP (isgraph);
+  else if (strcmp (name, "punct") == 0)
+    BUILD_CHARCLASS_LOOP (ispunct);
+  else if (strcmp (name, "xdigit") == 0)
+    BUILD_CHARCLASS_LOOP (isxdigit);
+  else
+    return REG_ECTYPE;
+
+  return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
+		    const unsigned char *class_name,
+		    const unsigned char *extra, int non_match,
+		    reg_errcode_t *err)
+{
+  re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+  re_charset_t *mbcset;
+  int alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+  reg_errcode_t ret;
+  re_token_t br_token;
+  bin_tree_t *tree;
+
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+  if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+  if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+
+  if (non_match)
+    {
+#ifdef RE_ENABLE_I18N
+      /*
+      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+	bitset_set(cset->sbcset, '\0');
+      */
+      mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+    }
+
+  /* We don't care the syntax in this case.  */
+  ret = build_charclass (trans, sbcset,
+#ifdef RE_ENABLE_I18N
+			 mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+			 class_name, 0);
+
+  if (BE (ret != REG_NOERROR, 0))
+    {
+      re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+      free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+      *err = ret;
+      return NULL;
+    }
+  /* \w match '_' also.  */
+  for (; *extra; extra++)
+    bitset_set (sbcset, *extra);
+
+  /* If it is non-matching list.  */
+  if (non_match)
+    bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+  /* Ensure only single byte characters are set.  */
+  if (dfa->mb_cur_max > 1)
+    bitset_mask (sbcset, dfa->sb_char);
+#endif
+
+  /* Build a tree for simple bracket.  */
+  br_token.type = SIMPLE_BRACKET;
+  br_token.opr.sbcset = sbcset;
+  tree = create_token_tree (dfa, NULL, NULL, &br_token);
+  if (BE (tree == NULL, 0))
+    goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+  if (dfa->mb_cur_max > 1)
+    {
+      bin_tree_t *mbc_tree;
+      /* Build a tree for complex bracket.  */
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      dfa->has_mb_node = 1;
+      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      if (BE (mbc_tree == NULL, 0))
+	goto build_word_op_espace;
+      /* Then join them by ALT node.  */
+      tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
+      if (BE (mbc_tree != NULL, 1))
+	return tree;
+    }
+  else
+    {
+      free_charset (mbcset);
+      return tree;
+    }
+#else /* not RE_ENABLE_I18N */
+  return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+  re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+  free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+  *err = REG_ESPACE;
+  return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+   Fetch a number from `input', and return the number.
+   Return -1, if the number field is empty like "{,1}".
+   Return -2, If an error is occured.  */
+
+static int
+fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
+{
+  int num = -1;
+  unsigned char c;
+  while (1)
+    {
+      fetch_token (token, input, syntax);
+      c = token->opr.c;
+      if (BE (token->type == END_OF_RE, 0))
+	return -2;
+      if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+	break;
+      num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+	     ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
+      num = (num > RE_DUP_MAX) ? -2 : num;
+    }
+  return num;
+}
+
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+  re_free (cset->mbchars);
+# ifdef _LIBC
+  re_free (cset->coll_syms);
+  re_free (cset->equiv_classes);
+  re_free (cset->range_starts);
+  re_free (cset->range_ends);
+# endif
+  re_free (cset->char_classes);
+  re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Functions for binary tree operation.  */
+
+/* Create a tree node.  */
+
+static bin_tree_t *
+create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+	     re_token_type_t type)
+{
+  re_token_t t;
+  t.type = type;
+  return create_token_tree (dfa, left, right, &t);
+}
+
+static bin_tree_t *
+create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+		   const re_token_t *token)
+{
+  bin_tree_t *tree;
+  if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
+    {
+      bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
+
+      if (storage == NULL)
+	return NULL;
+      storage->next = dfa->str_tree_storage;
+      dfa->str_tree_storage = storage;
+      dfa->str_tree_storage_idx = 0;
+    }
+  tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
+
+  tree->parent = NULL;
+  tree->left = left;
+  tree->right = right;
+  tree->token = *token;
+  tree->token.duplicated = 0;
+  tree->token.opt_subexp = 0;
+  tree->first = NULL;
+  tree->next = NULL;
+  tree->node_idx = -1;
+
+  if (left != NULL)
+    left->parent = tree;
+  if (right != NULL)
+    right->parent = tree;
+  return tree;
+}
+
+/* Mark the tree SRC as an optional subexpression.
+   To be called from preorder or postorder.  */
+
+static reg_errcode_t
+mark_opt_subexp (void *extra, bin_tree_t *node)
+{
+  int idx = (int) (long) extra;
+  if (node->token.type == SUBEXP && node->token.opr.idx == idx)
+    node->token.opt_subexp = 1;
+
+  return REG_NOERROR;
+}
+
+/* Free the allocated memory inside NODE. */
+
+static void
+free_token (re_token_t *node)
+{
+#ifdef RE_ENABLE_I18N
+  if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+    free_charset (node->opr.mbcset);
+  else
+#endif /* RE_ENABLE_I18N */
+    if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+      re_free (node->opr.sbcset);
+}
+
+/* Worker function for tree walking.  Free the allocated memory inside NODE
+   and its children. */
+
+static reg_errcode_t
+free_tree (void *extra, bin_tree_t *node)
+{
+  free_token (&node->token);
+  return REG_NOERROR;
+}
+
+
+/* Duplicate the node SRC, and return new node.  This is a preorder
+   visit similar to the one implemented by the generic visitor, but
+   we need more infrastructure to maintain two parallel trees --- so,
+   it's easier to duplicate.  */
+
+static bin_tree_t *
+duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
+{
+  const bin_tree_t *node;
+  bin_tree_t *dup_root;
+  bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
+
+  for (node = root; ; )
+    {
+      /* Create a new tree and link it back to the current parent.  */
+      *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
+      if (*p_new == NULL)
+	return NULL;
+      (*p_new)->parent = dup_node;
+      (*p_new)->token.duplicated = 1;
+      dup_node = *p_new;
+
+      /* Go to the left node, or up and to the right.  */
+      if (node->left)
+	{
+	  node = node->left;
+	  p_new = &dup_node->left;
+	}
+      else
+	{
+	  const bin_tree_t *prev = NULL;
+	  while (node->right == prev || node->right == NULL)
+	    {
+	      prev = node;
+	      node = node->parent;
+	      dup_node = dup_node->parent;
+	      if (!node)
+	        return dup_root;
+	    }
+	  node = node->right;
+	  p_new = &dup_node->right;
+	}
+    }
+}
+
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+/* GKINCLUDE #include "regexec.c" */
+/******************************************************************************/
+/******************************************************************************/
+/******************************************************************************/
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+				     int n) internal_function;
+static void match_ctx_clean (re_match_context_t *mctx) internal_function;
+static void match_ctx_free (re_match_context_t *cache) internal_function;
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+					  int str_idx, int from, int to)
+     internal_function;
+static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+     internal_function;
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
+					   int str_idx) internal_function;
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+						   int node, int str_idx)
+     internal_function;
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+			   re_dfastate_t **limited_sts, int last_node,
+			   int last_str_idx)
+     internal_function;
+static reg_errcode_t re_search_internal (const regex_t *preg,
+					 const char *string, int length,
+					 int start, int range, int stop,
+					 size_t nmatch, regmatch_t pmatch[],
+					 int eflags) internal_function;
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+			     const char *string1, int length1,
+			     const char *string2, int length2,
+			     int start, int range, struct re_registers *regs,
+			     int stop, int ret_len) internal_function;
+static int re_search_stub (struct re_pattern_buffer *bufp,
+			   const char *string, int length, int start,
+			   int range, int stop, struct re_registers *regs,
+			   int ret_len) internal_function;
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+			      int nregs, int regs_allocated) internal_function;
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
+     internal_function;
+static int check_matching (re_match_context_t *mctx, int fl_longest_match,
+			   int *p_match_first) internal_function;
+static int check_halt_state_context (const re_match_context_t *mctx,
+				     const re_dfastate_t *state, int idx)
+     internal_function;
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+			 regmatch_t *prev_idx_match, int cur_node,
+			 int cur_idx, int nmatch) internal_function;
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+				      int str_idx, int dest_node, int nregs,
+				      regmatch_t *regs,
+				      re_node_set *eps_via_nodes)
+     internal_function;
+static reg_errcode_t set_regs (const regex_t *preg,
+			       const re_match_context_t *mctx,
+			       size_t nmatch, regmatch_t *pmatch,
+			       int fl_backtrack) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
+     internal_function;
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const re_match_context_t *mctx,
+				re_sift_context_t *sctx,
+				int node_idx, int str_idx, int max_str_idx)
+     internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
+					   re_sift_context_t *sctx)
+     internal_function;
+static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
+					  re_sift_context_t *sctx, int str_idx,
+					  re_node_set *cur_dest)
+     internal_function;
+static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
+					      re_sift_context_t *sctx,
+					      int str_idx,
+					      re_node_set *dest_nodes)
+     internal_function;
+static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
+					    re_node_set *dest_nodes,
+					    const re_node_set *candidates)
+     internal_function;
+static int check_dst_limits (const re_match_context_t *mctx,
+			     re_node_set *limits,
+			     int dst_node, int dst_idx, int src_node,
+			     int src_idx) internal_function;
+static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
+					int boundaries, int subexp_idx,
+					int from_node, int bkref_idx)
+     internal_function;
+static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
+				      int limit, int subexp_idx,
+				      int node, int str_idx,
+				      int bkref_idx) internal_function;
+static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
+					  re_node_set *dest_nodes,
+					  const re_node_set *candidates,
+					  re_node_set *limits,
+					  struct re_backref_cache_entry *bkref_ents,
+					  int str_idx) internal_function;
+static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
+					re_sift_context_t *sctx,
+					int str_idx, const re_node_set *candidates)
+     internal_function;
+static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
+					re_dfastate_t **dst,
+					re_dfastate_t **src, int num)
+     internal_function;
+static re_dfastate_t *find_recover_state (reg_errcode_t *err,
+					 re_match_context_t *mctx) internal_function;
+static re_dfastate_t *transit_state (reg_errcode_t *err,
+				     re_match_context_t *mctx,
+				     re_dfastate_t *state) internal_function;
+static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
+					    re_match_context_t *mctx,
+					    re_dfastate_t *next_state)
+     internal_function;
+static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
+						re_node_set *cur_nodes,
+						int str_idx) internal_function;
+#if 0
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
+					re_match_context_t *mctx,
+					re_dfastate_t *pstate)
+     internal_function;
+#endif
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
+				       re_dfastate_t *pstate)
+     internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
+					  const re_node_set *nodes)
+     internal_function;
+static reg_errcode_t get_subexp (re_match_context_t *mctx,
+				 int bkref_node, int bkref_str_idx)
+     internal_function;
+static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
+				     const re_sub_match_top_t *sub_top,
+				     re_sub_match_last_t *sub_last,
+				     int bkref_node, int bkref_str)
+     internal_function;
+static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+			     int subexp_idx, int type) internal_function;
+static reg_errcode_t check_arrival (re_match_context_t *mctx,
+				    state_array_t *path, int top_node,
+				    int top_str, int last_node, int last_str,
+				    int type) internal_function;
+static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
+						   int str_idx,
+						   re_node_set *cur_nodes,
+						   re_node_set *next_nodes)
+     internal_function;
+static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
+					       re_node_set *cur_nodes,
+					       int ex_subexp, int type)
+     internal_function;
+static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
+						   re_node_set *dst_nodes,
+						   int target, int ex_subexp,
+						   int type) internal_function;
+static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
+					 re_node_set *cur_nodes, int cur_str,
+					 int subexp_num, int type)
+     internal_function;
+static int build_trtable (const re_dfa_t *dfa,
+			  re_dfastate_t *state) internal_function;
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+				    const re_string_t *input, int idx)
+     internal_function;
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+						   size_t name_len)
+     internal_function;
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
+				       const re_dfastate_t *state,
+				       re_node_set *states_node,
+				       bitset_t *states_ch) internal_function;
+static int check_node_accept (const re_match_context_t *mctx,
+			      const re_token_t *node, int idx)
+     internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+     internal_function;
+
+/* Entry point for POSIX code.  */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+   string STRING.
+
+   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
+   least NMATCH elements, and we set them to the offsets of the
+   corresponding matched substrings.
+
+   EFLAGS specifies `execution flags' which affect matching: if
+   REG_NOTBOL is set, then ^ does not match at the beginning of the
+   string; if REG_NOTEOL is set, then $ does not match at the end.
+
+   We return 0 if we find a match and REG_NOMATCH if not.  */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+    const regex_t *__restrict preg;
+    const char *__restrict string;
+    size_t nmatch;
+    regmatch_t pmatch[];
+    int eflags;
+{
+  reg_errcode_t err;
+  int start, length;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+
+  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
+    return REG_BADPAT;
+
+  if (eflags & REG_STARTEND)
+    {
+      start = pmatch[0].rm_so;
+      length = pmatch[0].rm_eo;
+    }
+  else
+    {
+      start = 0;
+      length = strlen (string);
+    }
+
+  __libc_lock_lock (dfa->lock);
+  if (preg->no_sub)
+    err = re_search_internal (preg, string, length, start, length - start,
+			      length, 0, NULL, eflags);
+  else
+    err = re_search_internal (preg, string, length, start, length - start,
+			      length, nmatch, pmatch, eflags);
+  __libc_lock_unlock (dfa->lock);
+  return err != REG_NOERROR;
+}
+
+#ifdef _LIBC
+# include <shlib-compat.h>
+versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
+
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+__typeof__ (__regexec) __compat_regexec;
+
+int
+attribute_compat_text_section
+__compat_regexec (const regex_t *__restrict preg,
+		  const char *__restrict string, size_t nmatch,
+		  regmatch_t pmatch[], int eflags)
+{
+  return regexec (preg, string, nmatch, pmatch,
+		  eflags & (REG_NOTBOL | REG_NOTEOL));
+}
+compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
+# endif
+#endif
+
+/* Entry points for GNU code.  */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+   The former two functions operate on STRING with length LENGTH,
+   while the later two operate on concatenation of STRING1 and STRING2
+   with lengths LENGTH1 and LENGTH2, respectively.
+
+   re_match() matches the compiled pattern in BUFP against the string,
+   starting at index START.
+
+   re_search() first tries matching at index START, then it tries to match
+   starting from index START + 1, and so on.  The last start position tried
+   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
+   way as re_match().)
+
+   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+   the first STOP characters of the concatenation of the strings should be
+   concerned.
+
+   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
+   computed relative to the concatenation, not relative to the individual
+   strings.)
+
+   On success, re_match* functions return the length of the match, re_search*
+   return the position of the start of the match.  Return value -1 means no
+   match was found and -2 indicates an internal error.  */
+
+int
+re_match (bufp, string, length, start, regs)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start;
+    struct re_registers *regs;
+{
+  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match, re_match)
+#endif
+
+int
+re_search (bufp, string, length, start, range, regs)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range;
+    struct re_registers *regs;
+{
+  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+int
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, stop;
+    struct re_registers *regs;
+{
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+			   start, 0, regs, stop, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+int
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop;
+    struct re_registers *regs;
+{
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+			   start, range, regs, stop, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+		  stop, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop, ret_len;
+    struct re_registers *regs;
+{
+  const char *str;
+  int rval;
+  int len = length1 + length2;
+  int free_str = 0;
+
+  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+    return -2;
+
+  /* Concatenate the strings.  */
+  if (length2 > 0)
+    if (length1 > 0)
+      {
+	char *s = re_malloc (char, len);
+
+	if (BE (s == NULL, 0))
+	  return -2;
+#ifdef _LIBC
+	memcpy (__mempcpy (s, string1, length1), string2, length2);
+#else
+	memcpy (s, string1, length1);
+	memcpy (s + length1, string2, length2);
+#endif
+	str = s;
+	free_str = 1;
+      }
+    else
+      str = string2;
+  else
+    str = string1;
+
+  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+			 ret_len);
+  if (free_str)
+    re_free ((char *) str);
+  return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+   Additional parameters:
+   If RET_LEN is nonzero the length of the match is returned (re_match style);
+   otherwise the position of the match is returned.  */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range, stop, ret_len;
+    struct re_registers *regs;
+{
+  reg_errcode_t result;
+  regmatch_t *pmatch;
+  int nregs, rval;
+  int eflags = 0;
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+
+  /* Check for out-of-range.  */
+  if (BE (start < 0 || start > length, 0))
+    return -1;
+  if (BE (start + range > length, 0))
+    range = length - start;
+  else if (BE (start + range < 0, 0))
+    range = -start;
+
+  __libc_lock_lock (dfa->lock);
+
+  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+  /* Compile fastmap if we haven't yet.  */
+  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+    re_compile_fastmap (bufp);
+
+  if (BE (bufp->no_sub, 0))
+    regs = NULL;
+
+  /* We need at least 1 register.  */
+  if (regs == NULL)
+    nregs = 1;
+  else if (BE (bufp->regs_allocated == REGS_FIXED &&
+	       regs->num_regs < bufp->re_nsub + 1, 0))
+    {
+      nregs = regs->num_regs;
+      if (BE (nregs < 1, 0))
+	{
+	  /* Nothing can be copied to regs.  */
+	  regs = NULL;
+	  nregs = 1;
+	}
+    }
+  else
+    nregs = bufp->re_nsub + 1;
+  pmatch = re_malloc (regmatch_t, nregs);
+  if (BE (pmatch == NULL, 0))
+    {
+      rval = -2;
+      goto out;
+    }
+
+  result = re_search_internal (bufp, string, length, start, range, stop,
+			       nregs, pmatch, eflags);
+
+  rval = 0;
+
+  /* I hope we needn't fill ther regs with -1's when no match was found.  */
+  if (result != REG_NOERROR)
+    rval = -1;
+  else if (regs != NULL)
+    {
+      /* If caller wants register contents data back, copy them.  */
+      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+					   bufp->regs_allocated);
+      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+	rval = -2;
+    }
+
+  if (BE (rval == 0, 1))
+    {
+      if (ret_len)
+	{
+	  assert (pmatch[0].rm_so == start);
+	  rval = pmatch[0].rm_eo - start;
+	}
+      else
+	rval = pmatch[0].rm_so;
+    }
+  re_free (pmatch);
+ out:
+  __libc_lock_unlock (dfa->lock);
+  return rval;
+}
+
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
+    struct re_registers *regs;
+    regmatch_t *pmatch;
+    int nregs, regs_allocated;
+{
+  int rval = REGS_REALLOCATE;
+  int i;
+  int need_regs = nregs + 1;
+  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+     uses.  */
+
+  /* Have the register data arrays been allocated?  */
+  if (regs_allocated == REGS_UNALLOCATED)
+    { /* No.  So allocate them with malloc.  */
+      regs->start = re_malloc (regoff_t, need_regs);
+      regs->end = re_malloc (regoff_t, need_regs);
+      if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+	return REGS_UNALLOCATED;
+      regs->num_regs = need_regs;
+    }
+  else if (regs_allocated == REGS_REALLOCATE)
+    { /* Yes.  If we need more elements than were already
+	 allocated, reallocate them.  If we need fewer, just
+	 leave it alone.  */
+      if (BE (need_regs > regs->num_regs, 0))
+	{
+	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
+	  regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
+	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+	    return REGS_UNALLOCATED;
+	  regs->start = new_start;
+	  regs->end = new_end;
+	  regs->num_regs = need_regs;
+	}
+    }
+  else
+    {
+      assert (regs_allocated == REGS_FIXED);
+      /* This function may not be called with REGS_FIXED and nregs too big.  */
+      assert (regs->num_regs >= nregs);
+      rval = REGS_FIXED;
+    }
+
+  /* Copy the regs.  */
+  for (i = 0; i < nregs; ++i)
+    {
+      regs->start[i] = pmatch[i].rm_so;
+      regs->end[i] = pmatch[i].rm_eo;
+    }
+  for ( ; i < regs->num_regs; ++i)
+    regs->start[i] = regs->end[i] = -1;
+
+  return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
+   this memory for recording register information.  STARTS and ENDS
+   must be allocated using the malloc library routine, and must each
+   be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+    struct re_pattern_buffer *bufp;
+    struct re_registers *regs;
+    unsigned num_regs;
+    regoff_t *starts, *ends;
+{
+  if (num_regs)
+    {
+      bufp->regs_allocated = REGS_REALLOCATE;
+      regs->num_regs = num_regs;
+      regs->start = starts;
+      regs->end = ends;
+    }
+  else
+    {
+      bufp->regs_allocated = REGS_UNALLOCATED;
+      regs->num_regs = 0;
+      regs->start = regs->end = (regoff_t *) 0;
+    }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+int
+# ifdef _LIBC
+weak_function
+# endif
+re_exec (s)
+     const char *s;
+{
+  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.  */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
+   mingings with regexec.  START, and RANGE have the same meanings
+   with re_search.
+   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+   otherwise return the error code.
+   Note: We assume front end functions already check ranges.
+   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
+
+static reg_errcode_t
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+		    eflags)
+    const regex_t *preg;
+    const char *string;
+    int length, start, range, stop, eflags;
+    size_t nmatch;
+    regmatch_t pmatch[];
+{
+  reg_errcode_t err;
+  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+  int left_lim, right_lim, incr;
+  int fl_longest_match, match_first, match_kind, match_last = -1;
+  int extra_nmatch;
+  int sb, ch;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+  re_match_context_t mctx = { .dfa = dfa };
+#else
+  re_match_context_t mctx;
+#endif
+  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
+  RE_TRANSLATE_TYPE t = preg->translate;
+
+#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
+  memset (&mctx, '\0', sizeof (re_match_context_t));
+  mctx.dfa = dfa;
+#endif
+
+  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
+  nmatch -= extra_nmatch;
+
+  /* Check if the DFA haven't been compiled.  */
+  if (BE (preg->used == 0 || dfa->init_state == NULL
+	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+	  || dfa->init_state_begbuf == NULL, 0))
+    return REG_NOMATCH;
+
+#ifdef DEBUG
+  /* We assume front-end functions already check them.  */
+  assert (start + range >= 0 && start + range <= length);
+#endif
+
+  /* If initial states with non-begbuf contexts have no elements,
+     the regex must be anchored.  If preg->newline_anchor is set,
+     we'll never use init_state_nl, so do not check it.  */
+  if (dfa->init_state->nodes.nelem == 0
+      && dfa->init_state_word->nodes.nelem == 0
+      && (dfa->init_state_nl->nodes.nelem == 0
+	  || !preg->newline_anchor))
+    {
+      if (start != 0 && start + range != 0)
+        return REG_NOMATCH;
+      start = range = 0;
+    }
+
+  /* We must check the longest matching, if nmatch > 0.  */
+  fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
+			    preg->translate, preg->syntax & RE_ICASE, dfa);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+  mctx.input.stop = stop;
+  mctx.input.raw_stop = stop;
+  mctx.input.newline_anchor = preg->newline_anchor;
+
+  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+
+  /* We will log all the DFA states through which the dfa pass,
+     if nmatch > 1, or this dfa has "multibyte node", which is a
+     back-reference or a node which can accept multibyte character or
+     multi character collating element.  */
+  if (nmatch > 1 || dfa->has_mb_node)
+    {
+      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
+      if (BE (mctx.state_log == NULL, 0))
+	{
+	  err = REG_ESPACE;
+	  goto free_return;
+	}
+    }
+  else
+    mctx.state_log = NULL;
+
+  match_first = start;
+  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
+
+  /* Check incrementally whether of not the input string match.  */
+  incr = (range < 0) ? -1 : 1;
+  left_lim = (range < 0) ? start + range : start;
+  right_lim = (range < 0) ? start : start + range;
+  sb = dfa->mb_cur_max == 1;
+  match_kind =
+    (fastmap
+     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+	| (range >= 0 ? 2 : 0)
+	| (t != NULL ? 1 : 0))
+     : 8);
+
+  for (;; match_first += incr)
+    {
+      err = REG_NOMATCH;
+      if (match_first < left_lim || right_lim < match_first)
+	goto free_return;
+
+      /* Advance as rapidly as possible through the string, until we
+	 find a plausible place to start matching.  This may be done
+	 with varying efficiency, so there are various possibilities:
+	 only the most common of them are specialized, in order to
+	 save on code size.  We use a switch statement for speed.  */
+      switch (match_kind)
+	{
+	case 8:
+	  /* No fastmap.  */
+	  break;
+
+	case 7:
+	  /* Fastmap with single-byte translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[t[(unsigned char) string[match_first]]])
+	    ++match_first;
+	  goto forward_match_found_start_or_reached_end;
+
+	case 6:
+	  /* Fastmap without translation, match forward.  */
+	  while (BE (match_first < right_lim, 1)
+		 && !fastmap[(unsigned char) string[match_first]])
+	    ++match_first;
+
+	forward_match_found_start_or_reached_end:
+	  if (BE (match_first == right_lim, 0))
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (!fastmap[t ? t[ch] : ch])
+		goto free_return;
+	    }
+	  break;
+
+	case 4:
+	case 5:
+	  /* Fastmap without multi-byte translation, match backwards.  */
+	  while (match_first >= left_lim)
+	    {
+	      ch = match_first >= length
+		       ? 0 : (unsigned char) string[match_first];
+	      if (fastmap[t ? t[ch] : ch])
+		break;
+	      --match_first;
+	    }
+	  if (match_first < left_lim)
+	    goto free_return;
+	  break;
+
+	default:
+	  /* In this case, we can't determine easily the current byte,
+	     since it might be a component byte of a multibyte
+	     character.  Then we use the constructed buffer instead.  */
+	  for (;;)
+	    {
+	      /* If MATCH_FIRST is out of the valid range, reconstruct the
+		 buffers.  */
+	      unsigned int offset = match_first - mctx.input.raw_mbs_idx;
+	      if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
+		{
+		  err = re_string_reconstruct (&mctx.input, match_first,
+					       eflags);
+		  if (BE (err != REG_NOERROR, 0))
+		    goto free_return;
+
+		  offset = match_first - mctx.input.raw_mbs_idx;
+		}
+	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+		 Note that MATCH_FIRST must not be smaller than 0.  */
+	      ch = (match_first >= length
+		    ? 0 : re_string_byte_at (&mctx.input, offset));
+	      if (fastmap[ch])
+		break;
+	      match_first += incr;
+	      if (match_first < left_lim || match_first > right_lim)
+	        {
+	          err = REG_NOMATCH;
+	          goto free_return;
+	        }
+	    }
+	  break;
+	}
+
+      /* Reconstruct the buffers so that the matcher can assume that
+	 the matching starts from the beginning of the buffer.  */
+      err = re_string_reconstruct (&mctx.input, match_first, eflags);
+      if (BE (err != REG_NOERROR, 0))
+	goto free_return;
+
+#ifdef RE_ENABLE_I18N
+     /* Don't consider this char as a possible match start if it part,
+	yet isn't the head, of a multibyte character.  */
+      if (!sb && !re_string_first_byte (&mctx.input, 0))
+	continue;
+#endif
+
+      /* It seems to be appropriate one, then use the matcher.  */
+      /* We assume that the matching starts from 0.  */
+      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+      match_last = check_matching (&mctx, fl_longest_match,
+				   range >= 0 ? &match_first : NULL);
+      if (match_last != -1)
+	{
+	  if (BE (match_last == -2, 0))
+	    {
+	      err = REG_ESPACE;
+	      goto free_return;
+	    }
+	  else
+	    {
+	      mctx.match_last = match_last;
+	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+		{
+		  re_dfastate_t *pstate = mctx.state_log[match_last];
+		  mctx.last_node = check_halt_state_context (&mctx, pstate,
+							     match_last);
+		}
+	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+		  || dfa->nbackref)
+		{
+		  err = prune_impossible_nodes (&mctx);
+		  if (err == REG_NOERROR)
+		    break;
+		  if (BE (err != REG_NOMATCH, 0))
+		    goto free_return;
+		  match_last = -1;
+		}
+	      else
+		break; /* We found a match.  */
+	    }
+	}
+
+      match_ctx_clean (&mctx);
+    }
+
+#ifdef DEBUG
+  assert (match_last != -1);
+  assert (err == REG_NOERROR);
+#endif
+
+  /* Set pmatch[] if we need.  */
+  if (nmatch > 0)
+    {
+      int reg_idx;
+
+      /* Initialize registers.  */
+      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
+	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+      /* Set the points where matching start/end.  */
+      pmatch[0].rm_so = 0;
+      pmatch[0].rm_eo = mctx.match_last;
+
+      if (!preg->no_sub && nmatch > 1)
+	{
+	  err = set_regs (preg, &mctx, nmatch, pmatch,
+			  dfa->has_plural_match && dfa->nbackref > 0);
+	  if (BE (err != REG_NOERROR, 0))
+	    goto free_return;
+	}
+
+      /* At last, add the offset to the each registers, since we slided
+	 the buffers so that we could assume that the matching starts
+	 from 0.  */
+      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+	if (pmatch[reg_idx].rm_so != -1)
+	  {
+#ifdef RE_ENABLE_I18N
+	    if (BE (mctx.input.offsets_needed != 0, 0))
+	      {
+		pmatch[reg_idx].rm_so =
+		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
+		   ? mctx.input.valid_raw_len
+		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
+		pmatch[reg_idx].rm_eo =
+		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
+		   ? mctx.input.valid_raw_len
+		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
+	      }
+#else
+	    assert (mctx.input.offsets_needed == 0);
+#endif
+	    pmatch[reg_idx].rm_so += match_first;
+	    pmatch[reg_idx].rm_eo += match_first;
+	  }
+      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
+	{
+	  pmatch[nmatch + reg_idx].rm_so = -1;
+	  pmatch[nmatch + reg_idx].rm_eo = -1;
+	}
+
+      if (dfa->subexp_map)
+        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+          if (dfa->subexp_map[reg_idx] != reg_idx)
+            {
+              pmatch[reg_idx + 1].rm_so
+                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+              pmatch[reg_idx + 1].rm_eo
+                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+            }
+    }
+
+ free_return:
+  re_free (mctx.state_log);
+  if (dfa->nbackref)
+    match_ctx_free (&mctx);
+  re_string_destruct (&mctx.input);
+  return err;
+}
+
+static reg_errcode_t
+prune_impossible_nodes (mctx)
+     re_match_context_t *mctx;
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int halt_node, match_last;
+  reg_errcode_t ret;
+  re_dfastate_t **sifted_states;
+  re_dfastate_t **lim_states = NULL;
+  re_sift_context_t sctx;
+#ifdef DEBUG
+  assert (mctx->state_log != NULL);
+#endif
+  match_last = mctx->match_last;
+  halt_node = mctx->last_node;
+  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+  if (BE (sifted_states == NULL, 0))
+    {
+      ret = REG_ESPACE;
+      goto free_return;
+    }
+  if (dfa->nbackref)
+    {
+      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+      if (BE (lim_states == NULL, 0))
+	{
+	  ret = REG_ESPACE;
+	  goto free_return;
+	}
+      while (1)
+	{
+	  memset (lim_states, '\0',
+		  sizeof (re_dfastate_t *) * (match_last + 1));
+	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+			 match_last);
+	  ret = sift_states_backward (mctx, &sctx);
+	  re_node_set_free (&sctx.limits);
+	  if (BE (ret != REG_NOERROR, 0))
+	      goto free_return;
+	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
+	    break;
+	  do
+	    {
+	      --match_last;
+	      if (match_last < 0)
+		{
+		  ret = REG_NOMATCH;
+		  goto free_return;
+		}
+	    } while (mctx->state_log[match_last] == NULL
+		     || !mctx->state_log[match_last]->halt);
+	  halt_node = check_halt_state_context (mctx,
+						mctx->state_log[match_last],
+						match_last);
+	}
+      ret = merge_state_array (dfa, sifted_states, lim_states,
+			       match_last + 1);
+      re_free (lim_states);
+      lim_states = NULL;
+      if (BE (ret != REG_NOERROR, 0))
+	goto free_return;
+    }
+  else
+    {
+      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
+      ret = sift_states_backward (mctx, &sctx);
+      re_node_set_free (&sctx.limits);
+      if (BE (ret != REG_NOERROR, 0))
+	goto free_return;
+    }
+  re_free (mctx->state_log);
+  mctx->state_log = sifted_states;
+  sifted_states = NULL;
+  mctx->last_node = halt_node;
+  mctx->match_last = match_last;
+  ret = REG_NOERROR;
+ free_return:
+  re_free (sifted_states);
+  re_free (lim_states);
+  return ret;
+}
+
+/* Acquire an initial state and return it.
+   We must select appropriate initial state depending on the context,
+   since initial states may have constraints like "\<", "^", etc..  */
+
+static inline re_dfastate_t *
+__attribute ((always_inline)) internal_function
+acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
+			    int idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  if (dfa->init_state->has_constraint)
+    {
+      unsigned int context;
+      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
+      if (IS_WORD_CONTEXT (context))
+	return dfa->init_state_word;
+      else if (IS_ORDINARY_CONTEXT (context))
+	return dfa->init_state;
+      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+	return dfa->init_state_begbuf;
+      else if (IS_NEWLINE_CONTEXT (context))
+	return dfa->init_state_nl;
+      else if (IS_BEGBUF_CONTEXT (context))
+	{
+	  /* It is relatively rare case, then calculate on demand.  */
+	  return re_acquire_state_context (err, dfa,
+					   dfa->init_state->entrance_nodes,
+					   context);
+	}
+      else
+	/* Must not happen?  */
+	return dfa->init_state;
+    }
+  else
+    return dfa->init_state;
+}
+
+/* Check whether the regular expression match input string INPUT or not,
+   and return the index where the matching end, return -1 if not match,
+   or return -2 in case of an error.
+   FL_LONGEST_MATCH means we want the POSIX longest matching.
+   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
+   next place where we may want to try matching.
+   Note that the matcher assume that the maching starts from the current
+   index of the buffer.  */
+
+static int
+internal_function
+check_matching (re_match_context_t *mctx, int fl_longest_match,
+		int *p_match_first)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int match = 0;
+  int match_last = -1;
+  int cur_str_idx = re_string_cur_idx (&mctx->input);
+  re_dfastate_t *cur_state;
+  int at_init_state = p_match_first != NULL;
+  int next_start_idx = cur_str_idx;
+
+  err = REG_NOERROR;
+  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
+  /* An initial state must not be NULL (invalid).  */
+  if (BE (cur_state == NULL, 0))
+    {
+      assert (err == REG_ESPACE);
+      return -2;
+    }
+
+  if (mctx->state_log != NULL)
+    {
+      mctx->state_log[cur_str_idx] = cur_state;
+
+      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
+	 later.  E.g. Processing back references.  */
+      if (BE (dfa->nbackref, 0))
+	{
+	  at_init_state = 0;
+	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+
+	  if (cur_state->has_backref)
+	    {
+	      err = transit_state_bkref (mctx, &cur_state->nodes);
+	      if (BE (err != REG_NOERROR, 0))
+	        return err;
+	    }
+	}
+    }
+
+  /* If the RE accepts NULL string.  */
+  if (BE (cur_state->halt, 0))
+    {
+      if (!cur_state->has_constraint
+	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
+	{
+	  if (!fl_longest_match)
+	    return cur_str_idx;
+	  else
+	    {
+	      match_last = cur_str_idx;
+	      match = 1;
+	    }
+	}
+    }
+
+  while (!re_string_eoi (&mctx->input))
+    {
+      re_dfastate_t *old_state = cur_state;
+      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+          || (BE (next_char_idx >= mctx->input.valid_len, 0)
+              && mctx->input.valid_len < mctx->input.len))
+        {
+          err = extend_buffers (mctx);
+          if (BE (err != REG_NOERROR, 0))
+	    {
+	      assert (err == REG_ESPACE);
+	      return -2;
+	    }
+        }
+
+      cur_state = transit_state (&err, mctx, cur_state);
+      if (mctx->state_log != NULL)
+	cur_state = merge_state_with_log (&err, mctx, cur_state);
+
+      if (cur_state == NULL)
+	{
+	  /* Reached the invalid state or an error.  Try to recover a valid
+	     state using the state log, if available and if we have not
+	     already found a valid (even if not the longest) match.  */
+	  if (BE (err != REG_NOERROR, 0))
+	    return -2;
+
+	  if (mctx->state_log == NULL
+	      || (match && !fl_longest_match)
+	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
+	    break;
+	}
+
+      if (BE (at_init_state, 0))
+	{
+	  if (old_state == cur_state)
+	    next_start_idx = next_char_idx;
+	  else
+	    at_init_state = 0;
+	}
+
+      if (cur_state->halt)
+	{
+	  /* Reached a halt state.
+	     Check the halt state can satisfy the current context.  */
+	  if (!cur_state->has_constraint
+	      || check_halt_state_context (mctx, cur_state,
+					   re_string_cur_idx (&mctx->input)))
+	    {
+	      /* We found an appropriate halt state.  */
+	      match_last = re_string_cur_idx (&mctx->input);
+	      match = 1;
+
+	      /* We found a match, do not modify match_first below.  */
+	      p_match_first = NULL;
+	      if (!fl_longest_match)
+		break;
+	    }
+	}
+    }
+
+  if (p_match_first)
+    *p_match_first += next_start_idx;
+
+  return match_last;
+}
+
+/* Check NODE match the current context.  */
+
+static int
+internal_function
+check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
+{
+  re_token_type_t type = dfa->nodes[node].type;
+  unsigned int constraint = dfa->nodes[node].constraint;
+  if (type != END_OF_RE)
+    return 0;
+  if (!constraint)
+    return 1;
+  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
+    return 0;
+  return 1;
+}
+
+/* Check the halt state STATE match the current context.
+   Return 0 if not match, if the node, STATE has, is a halt node and
+   match the context, return the node.  */
+
+static int
+internal_function
+check_halt_state_context (const re_match_context_t *mctx,
+			  const re_dfastate_t *state, int idx)
+{
+  int i;
+  unsigned int context;
+#ifdef DEBUG
+  assert (state->halt);
+#endif
+  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
+  for (i = 0; i < state->nodes.nelem; ++i)
+    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
+      return state->nodes.elems[i];
+  return 0;
+}
+
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+   corresponding to the DFA).
+   Return the destination node, and update EPS_VIA_NODES, return -1 in case
+   of errors.  */
+
+static int
+internal_function
+proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
+		   int *pidx, int node, re_node_set *eps_via_nodes,
+		   struct re_fail_stack_t *fs)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int i, err;
+  if (IS_EPSILON_NODE (dfa->nodes[node].type))
+    {
+      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+      re_node_set *edests = &dfa->edests[node];
+      int dest_node;
+      err = re_node_set_insert (eps_via_nodes, node);
+      if (BE (err < 0, 0))
+	return -2;
+      /* Pick up a valid destination, or return -1 if none is found.  */
+      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
+	{
+	  int candidate = edests->elems[i];
+	  if (!re_node_set_contains (cur_nodes, candidate))
+	    continue;
+          if (dest_node == -1)
+	    dest_node = candidate;
+
+          else
+	    {
+	      /* In order to avoid infinite loop like "(a*)*", return the second
+	         epsilon-transition if the first was already considered.  */
+	      if (re_node_set_contains (eps_via_nodes, dest_node))
+	        return candidate;
+
+	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
+	      else if (fs != NULL
+		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
+				           eps_via_nodes))
+		return -2;
+
+	      /* We know we are going to exit.  */
+	      break;
+	    }
+	}
+      return dest_node;
+    }
+  else
+    {
+      int naccepted = 0;
+      re_token_type_t type = dfa->nodes[node].type;
+
+#ifdef RE_ENABLE_I18N
+      if (dfa->nodes[node].accept_mb)
+	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
+      else
+#endif /* RE_ENABLE_I18N */
+      if (type == OP_BACK_REF)
+	{
+	  int subexp_idx = dfa->nodes[node].opr.idx + 1;
+	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+	  if (fs != NULL)
+	    {
+	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
+		return -1;
+	      else if (naccepted)
+		{
+		  char *buf = (char *) re_string_get_buffer (&mctx->input);
+		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+			      naccepted) != 0)
+		    return -1;
+		}
+	    }
+
+	  if (naccepted == 0)
+	    {
+	      int dest_node;
+	      err = re_node_set_insert (eps_via_nodes, node);
+	      if (BE (err < 0, 0))
+		return -2;
+	      dest_node = dfa->edests[node].elems[0];
+	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+					dest_node))
+		return dest_node;
+	    }
+	}
+
+      if (naccepted != 0
+	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
+	{
+	  int dest_node = dfa->nexts[node];
+	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
+	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
+		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+					       dest_node)))
+	    return -1;
+	  re_node_set_empty (eps_via_nodes);
+	  return dest_node;
+	}
+    }
+  return -1;
+}
+
+static reg_errcode_t
+internal_function
+push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
+		 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+  reg_errcode_t err;
+  int num = fs->num++;
+  if (fs->num == fs->alloc)
+    {
+      struct re_fail_stack_ent_t *new_array;
+      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
+				       * fs->alloc * 2));
+      if (new_array == NULL)
+	return REG_ESPACE;
+      fs->alloc *= 2;
+      fs->stack = new_array;
+    }
+  fs->stack[num].idx = str_idx;
+  fs->stack[num].node = dest_node;
+  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+  if (fs->stack[num].regs == NULL)
+    return REG_ESPACE;
+  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
+  return err;
+}
+
+static int
+internal_function
+pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+		regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+  int num = --fs->num;
+  assert (num >= 0);
+  *pidx = fs->stack[num].idx;
+  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+  re_node_set_free (eps_via_nodes);
+  re_free (fs->stack[num].regs);
+  *eps_via_nodes = fs->stack[num].eps_via_nodes;
+  return fs->stack[num].node;
+}
+
+/* Set the positions where the subexpressions are starts/ends to registers
+   PMATCH.
+   Note: We assume that pmatch[0] is already set, and
+   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
+
+static reg_errcode_t
+internal_function
+set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
+	  regmatch_t *pmatch, int fl_backtrack)
+{
+  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+  int idx, cur_node;
+  re_node_set eps_via_nodes;
+  struct re_fail_stack_t *fs;
+  struct re_fail_stack_t fs_body = { 0, 2, NULL };
+  regmatch_t *prev_idx_match;
+  int prev_idx_match_malloced = 0;
+
+#ifdef DEBUG
+  assert (nmatch > 1);
+  assert (mctx->state_log != NULL);
+#endif
+  if (fl_backtrack)
+    {
+      fs = &fs_body;
+      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+      if (fs->stack == NULL)
+	return REG_ESPACE;
+    }
+  else
+    fs = NULL;
+
+  cur_node = dfa->init_node;
+  re_node_set_init_empty (&eps_via_nodes);
+
+  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
+    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
+  else
+    {
+      prev_idx_match = re_malloc (regmatch_t, nmatch);
+      if (prev_idx_match == NULL)
+	{
+	  free_fail_stack_return (fs);
+	  return REG_ESPACE;
+	}
+      prev_idx_match_malloced = 1;
+    }
+  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+
+  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
+    {
+      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
+
+      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+	{
+	  int reg_idx;
+	  if (fs)
+	    {
+	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
+		  break;
+	      if (reg_idx == nmatch)
+		{
+		  re_node_set_free (&eps_via_nodes);
+		  if (prev_idx_match_malloced)
+		    re_free (prev_idx_match);
+		  return free_fail_stack_return (fs);
+		}
+	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+					 &eps_via_nodes);
+	    }
+	  else
+	    {
+	      re_node_set_free (&eps_via_nodes);
+	      if (prev_idx_match_malloced)
+		re_free (prev_idx_match);
+	      return REG_NOERROR;
+	    }
+	}
+
+      /* Proceed to next node.  */
+      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
+				    &eps_via_nodes, fs);
+
+      if (BE (cur_node < 0, 0))
+	{
+	  if (BE (cur_node == -2, 0))
+	    {
+	      re_node_set_free (&eps_via_nodes);
+	      if (prev_idx_match_malloced)
+		re_free (prev_idx_match);
+	      free_fail_stack_return (fs);
+	      return REG_ESPACE;
+	    }
+	  if (fs)
+	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+				       &eps_via_nodes);
+	  else
+	    {
+	      re_node_set_free (&eps_via_nodes);
+	      if (prev_idx_match_malloced)
+		re_free (prev_idx_match);
+	      return REG_NOMATCH;
+	    }
+	}
+    }
+  re_node_set_free (&eps_via_nodes);
+  if (prev_idx_match_malloced)
+    re_free (prev_idx_match);
+  return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+internal_function
+free_fail_stack_return (struct re_fail_stack_t *fs)
+{
+  if (fs)
+    {
+      int fs_idx;
+      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+	{
+	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+	  re_free (fs->stack[fs_idx].regs);
+	}
+      re_free (fs->stack);
+    }
+  return REG_NOERROR;
+}
+
+static void
+internal_function
+update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+	     regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
+{
+  int type = dfa->nodes[cur_node].type;
+  if (type == OP_OPEN_SUBEXP)
+    {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+
+      /* We are at the first node of this sub expression.  */
+      if (reg_num < nmatch)
+	{
+	  pmatch[reg_num].rm_so = cur_idx;
+	  pmatch[reg_num].rm_eo = -1;
+	}
+    }
+  else if (type == OP_CLOSE_SUBEXP)
+    {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+      if (reg_num < nmatch)
+	{
+	  /* We are at the last node of this sub expression.  */
+	  if (pmatch[reg_num].rm_so < cur_idx)
+	    {
+	      pmatch[reg_num].rm_eo = cur_idx;
+	      /* This is a non-empty match or we are not inside an optional
+		 subexpression.  Accept this right away.  */
+	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+	    }
+	  else
+	    {
+	      if (dfa->nodes[cur_node].opt_subexp
+		  && prev_idx_match[reg_num].rm_so != -1)
+		/* We transited through an empty match for an optional
+		   subexpression, like (a?)*, and this is not the subexp's
+		   first match.  Copy back the old content of the registers
+		   so that matches of an inner subexpression are undone as
+		   well, like in ((a?))*.  */
+		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
+	      else
+		/* We completed a subexpression, but it may be part of
+		   an optional one, so do not update PREV_IDX_MATCH.  */
+		pmatch[reg_num].rm_eo = cur_idx;
+	    }
+	}
+    }
+}
+
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
+   and sift the nodes in each states according to the following rules.
+   Updated state_log will be wrote to STATE_LOG.
+
+   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
+	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
+	the LAST_NODE, we throw away the node `a'.
+     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
+	string `s' and transit to `b':
+	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
+	   away the node `a'.
+	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
+	    thrown away, we throw away the node `a'.
+     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
+	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
+	   node `a'.
+	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
+	    we throw away the node `a'.  */
+
+#define STATE_NODE_CONTAINS(state,node) \
+  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
+
+static reg_errcode_t
+internal_function
+sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
+{
+  reg_errcode_t err;
+  int null_cnt = 0;
+  int str_idx = sctx->last_str_idx;
+  re_node_set cur_dest;
+
+#ifdef DEBUG
+  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
+#endif
+
+  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
+     transit to the last_node and the last_node itself.  */
+  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+  if (BE (err != REG_NOERROR, 0))
+    goto free_return;
+
+  /* Then check each states in the state_log.  */
+  while (str_idx > 0)
+    {
+      /* Update counters.  */
+      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+      if (null_cnt > mctx->max_mb_elem_len)
+	{
+	  memset (sctx->sifted_states, '\0',
+		  sizeof (re_dfastate_t *) * str_idx);
+	  re_node_set_free (&cur_dest);
+	  return REG_NOERROR;
+	}
+      re_node_set_empty (&cur_dest);
+      --str_idx;
+
+      if (mctx->state_log[str_idx])
+	{
+	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
+          if (BE (err != REG_NOERROR, 0))
+	    goto free_return;
+	}
+
+      /* Add all the nodes which satisfy the following conditions:
+	 - It can epsilon transit to a node in CUR_DEST.
+	 - It is in CUR_SRC.
+	 And update state_log.  */
+      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+      if (BE (err != REG_NOERROR, 0))
+	goto free_return;
+    }
+  err = REG_NOERROR;
+ free_return:
+  re_node_set_free (&cur_dest);
+  return err;
+}
+
+static reg_errcode_t
+internal_function
+build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
+		     int str_idx, re_node_set *cur_dest)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
+  int i;
+
+  /* Then build the next sifted state.
+     We build the next sifted state on `cur_dest', and update
+     `sifted_states[str_idx]' with `cur_dest'.
+     Note:
+     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+     `cur_src' points the node_set of the old `state_log[str_idx]'
+     (with the epsilon nodes pre-filtered out).  */
+  for (i = 0; i < cur_src->nelem; i++)
+    {
+      int prev_node = cur_src->elems[i];
+      int naccepted = 0;
+      int ret;
+
+#ifdef DEBUG
+      re_token_type_t type = dfa->nodes[prev_node].type;
+      assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+      /* If the node may accept `multi byte'.  */
+      if (dfa->nodes[prev_node].accept_mb)
+	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
+					 str_idx, sctx->last_str_idx);
+#endif /* RE_ENABLE_I18N */
+
+      /* We don't check backreferences here.
+	 See update_cur_sifted_state().  */
+      if (!naccepted
+	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
+	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+				  dfa->nexts[prev_node]))
+	naccepted = 1;
+
+      if (naccepted == 0)
+	continue;
+
+      if (sctx->limits.nelem)
+	{
+	  int to_idx = str_idx + naccepted;
+	  if (check_dst_limits (mctx, &sctx->limits,
+				dfa->nexts[prev_node], to_idx,
+				prev_node, str_idx))
+	    continue;
+	}
+      ret = re_node_set_insert (cur_dest, prev_node);
+      if (BE (ret == -1, 0))
+	return REG_ESPACE;
+    }
+
+  return REG_NOERROR;
+}
+
+/* Helper functions.  */
+
+static reg_errcode_t
+internal_function
+clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
+{
+  int top = mctx->state_log_top;
+
+  if (next_state_log_idx >= mctx->input.bufs_len
+      || (next_state_log_idx >= mctx->input.valid_len
+	  && mctx->input.valid_len < mctx->input.len))
+    {
+      reg_errcode_t err;
+      err = extend_buffers (mctx);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  if (top < next_state_log_idx)
+    {
+      memset (mctx->state_log + top + 1, '\0',
+	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
+      mctx->state_log_top = next_state_log_idx;
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
+		   re_dfastate_t **src, int num)
+{
+  int st_idx;
+  reg_errcode_t err;
+  for (st_idx = 0; st_idx < num; ++st_idx)
+    {
+      if (dst[st_idx] == NULL)
+	dst[st_idx] = src[st_idx];
+      else if (src[st_idx] != NULL)
+	{
+	  re_node_set merged_set;
+	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
+					&src[st_idx]->nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
+	  re_node_set_free (&merged_set);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+update_cur_sifted_state (const re_match_context_t *mctx,
+			 re_sift_context_t *sctx, int str_idx,
+			 re_node_set *dest_nodes)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err = REG_NOERROR;
+  const re_node_set *candidates;
+  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
+		: &mctx->state_log[str_idx]->nodes);
+
+  if (dest_nodes->nelem == 0)
+    sctx->sifted_states[str_idx] = NULL;
+  else
+    {
+      if (candidates)
+	{
+	  /* At first, add the nodes which can epsilon transit to a node in
+	     DEST_NODE.  */
+	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+
+	  /* Then, check the limitations in the current sift_context.  */
+	  if (sctx->limits.nelem)
+	    {
+	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+					 mctx->bkref_ents, str_idx);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	}
+
+      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+
+  if (candidates && mctx->state_log[str_idx]->has_backref)
+    {
+      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
+		       const re_node_set *candidates)
+{
+  reg_errcode_t err = REG_NOERROR;
+  int i;
+
+  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+
+  if (!state->inveclosure.alloc)
+    {
+      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
+      if (BE (err != REG_NOERROR, 0))
+        return REG_ESPACE;
+      for (i = 0; i < dest_nodes->nelem; i++)
+        re_node_set_merge (&state->inveclosure,
+			   dfa->inveclosures + dest_nodes->elems[i]);
+    }
+  return re_node_set_add_intersect (dest_nodes, candidates,
+				    &state->inveclosure);
+}
+
+static reg_errcode_t
+internal_function
+sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
+		       const re_node_set *candidates)
+{
+    int ecl_idx;
+    reg_errcode_t err;
+    re_node_set *inv_eclosure = dfa->inveclosures + node;
+    re_node_set except_nodes;
+    re_node_set_init_empty (&except_nodes);
+    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+      {
+	int cur_node = inv_eclosure->elems[ecl_idx];
+	if (cur_node == node)
+	  continue;
+	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
+	  {
+	    int edst1 = dfa->edests[cur_node].elems[0];
+	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
+			 ? dfa->edests[cur_node].elems[1] : -1);
+	    if ((!re_node_set_contains (inv_eclosure, edst1)
+		 && re_node_set_contains (dest_nodes, edst1))
+		|| (edst2 > 0
+		    && !re_node_set_contains (inv_eclosure, edst2)
+		    && re_node_set_contains (dest_nodes, edst2)))
+	      {
+		err = re_node_set_add_intersect (&except_nodes, candidates,
+						 dfa->inveclosures + cur_node);
+		if (BE (err != REG_NOERROR, 0))
+		  {
+		    re_node_set_free (&except_nodes);
+		    return err;
+		  }
+	      }
+	  }
+      }
+    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+      {
+	int cur_node = inv_eclosure->elems[ecl_idx];
+	if (!re_node_set_contains (&except_nodes, cur_node))
+	  {
+	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
+	    re_node_set_remove_at (dest_nodes, idx);
+	  }
+      }
+    re_node_set_free (&except_nodes);
+    return REG_NOERROR;
+}
+
+static int
+internal_function
+check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
+		  int dst_node, int dst_idx, int src_node, int src_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int lim_idx, src_pos, dst_pos;
+
+  int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
+  int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
+  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+    {
+      int subexp_idx;
+      struct re_backref_cache_entry *ent;
+      ent = mctx->bkref_ents + limits->elems[lim_idx];
+      subexp_idx = dfa->nodes[ent->node].opr.idx;
+
+      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+					   subexp_idx, dst_node, dst_idx,
+					   dst_bkref_idx);
+      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+					   subexp_idx, src_node, src_idx,
+					   src_bkref_idx);
+
+      /* In case of:
+	 <src> <dst> ( <subexp> )
+	 ( <subexp> ) <src> <dst>
+	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
+      if (src_pos == dst_pos)
+	continue; /* This is unrelated limitation.  */
+      else
+	return 1;
+    }
+  return 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
+			     int subexp_idx, int from_node, int bkref_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  const re_node_set *eclosures = dfa->eclosures + from_node;
+  int node_idx;
+
+  /* Else, we are on the boundary: examine the nodes on the epsilon
+     closure.  */
+  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+    {
+      int node = eclosures->elems[node_idx];
+      switch (dfa->nodes[node].type)
+	{
+	case OP_BACK_REF:
+	  if (bkref_idx != -1)
+	    {
+	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
+	      do
+	        {
+		  int dst, cpos;
+
+		  if (ent->node != node)
+		    continue;
+
+		  if (subexp_idx < BITSET_WORD_BITS
+		      && !(ent->eps_reachable_subexps_map
+			   & ((bitset_word_t) 1 << subexp_idx)))
+		    continue;
+
+		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
+		     OP_CLOSE_SUBEXP cases below.  But, if the
+		     destination node is the same node as the source
+		     node, don't recurse because it would cause an
+		     infinite loop: a regex that exhibits this behavior
+		     is ()\1*\1*  */
+		  dst = dfa->edests[node].elems[0];
+		  if (dst == from_node)
+		    {
+		      if (boundaries & 1)
+		        return -1;
+		      else /* if (boundaries & 2) */
+		        return 0;
+		    }
+
+		  cpos =
+		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+						 dst, bkref_idx);
+		  if (cpos == -1 /* && (boundaries & 1) */)
+		    return -1;
+		  if (cpos == 0 && (boundaries & 2))
+		    return 0;
+
+		  if (subexp_idx < BITSET_WORD_BITS)
+		    ent->eps_reachable_subexps_map
+		      &= ~((bitset_word_t) 1 << subexp_idx);
+	        }
+	      while (ent++->more);
+	    }
+	  break;
+
+	case OP_OPEN_SUBEXP:
+	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
+	    return -1;
+	  break;
+
+	case OP_CLOSE_SUBEXP:
+	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
+	    return 0;
+	  break;
+
+	default:
+	    break;
+	}
+    }
+
+  return (boundaries & 2) ? 1 : 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
+			   int subexp_idx, int from_node, int str_idx,
+			   int bkref_idx)
+{
+  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+  int boundaries;
+
+  /* If we are outside the range of the subexpression, return -1 or 1.  */
+  if (str_idx < lim->subexp_from)
+    return -1;
+
+  if (lim->subexp_to < str_idx)
+    return 1;
+
+  /* If we are within the subexpression, return 0.  */
+  boundaries = (str_idx == lim->subexp_from);
+  boundaries |= (str_idx == lim->subexp_to) << 1;
+  if (boundaries == 0)
+    return 0;
+
+  /* Else, examine epsilon closure.  */
+  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+				      from_node, bkref_idx);
+}
+
+/* Check the limitations of sub expressions LIMITS, and remove the nodes
+   which are against limitations from DEST_NODES. */
+
+static reg_errcode_t
+internal_function
+check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
+		     const re_node_set *candidates, re_node_set *limits,
+		     struct re_backref_cache_entry *bkref_ents, int str_idx)
+{
+  reg_errcode_t err;
+  int node_idx, lim_idx;
+
+  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+    {
+      int subexp_idx;
+      struct re_backref_cache_entry *ent;
+      ent = bkref_ents + limits->elems[lim_idx];
+
+      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
+	continue; /* This is unrelated limitation.  */
+
+      subexp_idx = dfa->nodes[ent->node].opr.idx;
+      if (ent->subexp_to == str_idx)
+	{
+	  int ops_node = -1;
+	  int cls_node = -1;
+	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	    {
+	      int node = dest_nodes->elems[node_idx];
+	      re_token_type_t type = dfa->nodes[node].type;
+	      if (type == OP_OPEN_SUBEXP
+		  && subexp_idx == dfa->nodes[node].opr.idx)
+		ops_node = node;
+	      else if (type == OP_CLOSE_SUBEXP
+		       && subexp_idx == dfa->nodes[node].opr.idx)
+		cls_node = node;
+	    }
+
+	  /* Check the limitation of the open subexpression.  */
+	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
+	  if (ops_node >= 0)
+	    {
+	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
+					   candidates);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+
+	  /* Check the limitation of the close subexpression.  */
+	  if (cls_node >= 0)
+	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	      {
+		int node = dest_nodes->elems[node_idx];
+		if (!re_node_set_contains (dfa->inveclosures + node,
+					   cls_node)
+		    && !re_node_set_contains (dfa->eclosures + node,
+					      cls_node))
+		  {
+		    /* It is against this limitation.
+		       Remove it form the current sifted state.  */
+		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+						 candidates);
+		    if (BE (err != REG_NOERROR, 0))
+		      return err;
+		    --node_idx;
+		  }
+	      }
+	}
+      else /* (ent->subexp_to != str_idx)  */
+	{
+	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	    {
+	      int node = dest_nodes->elems[node_idx];
+	      re_token_type_t type = dfa->nodes[node].type;
+	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
+		{
+		  if (subexp_idx != dfa->nodes[node].opr.idx)
+		    continue;
+		  /* It is against this limitation.
+		     Remove it form the current sifted state.  */
+		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+					       candidates);
+		  if (BE (err != REG_NOERROR, 0))
+		    return err;
+		}
+	    }
+	}
+    }
+  return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
+		   int str_idx, const re_node_set *candidates)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int node_idx, node;
+  re_sift_context_t local_sctx;
+  int first_idx = search_cur_bkref_entry (mctx, str_idx);
+
+  if (first_idx == -1)
+    return REG_NOERROR;
+
+  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
+
+  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
+    {
+      int enabled_idx;
+      re_token_type_t type;
+      struct re_backref_cache_entry *entry;
+      node = candidates->elems[node_idx];
+      type = dfa->nodes[node].type;
+      /* Avoid infinite loop for the REs like "()\1+".  */
+      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
+	continue;
+      if (type != OP_BACK_REF)
+	continue;
+
+      entry = mctx->bkref_ents + first_idx;
+      enabled_idx = first_idx;
+      do
+	{
+	  int subexp_len;
+	  int to_idx;
+	  int dst_node;
+	  int ret;
+	  re_dfastate_t *cur_state;
+
+	  if (entry->node != node)
+	    continue;
+	  subexp_len = entry->subexp_to - entry->subexp_from;
+	  to_idx = str_idx + subexp_len;
+	  dst_node = (subexp_len ? dfa->nexts[node]
+		      : dfa->edests[node].elems[0]);
+
+	  if (to_idx > sctx->last_str_idx
+	      || sctx->sifted_states[to_idx] == NULL
+	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
+	      || check_dst_limits (mctx, &sctx->limits, node,
+				   str_idx, dst_node, to_idx))
+	    continue;
+
+	  if (local_sctx.sifted_states == NULL)
+	    {
+	      local_sctx = *sctx;
+	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  local_sctx.last_node = node;
+	  local_sctx.last_str_idx = str_idx;
+	  ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
+	  if (BE (ret < 0, 0))
+	    {
+	      err = REG_ESPACE;
+	      goto free_return;
+	    }
+	  cur_state = local_sctx.sifted_states[str_idx];
+	  err = sift_states_backward (mctx, &local_sctx);
+	  if (BE (err != REG_NOERROR, 0))
+	    goto free_return;
+	  if (sctx->limited_states != NULL)
+	    {
+	      err = merge_state_array (dfa, sctx->limited_states,
+				       local_sctx.sifted_states,
+				       str_idx + 1);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  local_sctx.sifted_states[str_idx] = cur_state;
+	  re_node_set_remove (&local_sctx.limits, enabled_idx);
+
+	  /* mctx->bkref_ents may have changed, reload the pointer.  */
+          entry = mctx->bkref_ents + enabled_idx;
+	}
+      while (enabled_idx++, entry++->more);
+    }
+  err = REG_NOERROR;
+ free_return:
+  if (local_sctx.sifted_states != NULL)
+    {
+      re_node_set_free (&local_sctx.limits);
+    }
+
+  return err;
+}
+
+
+#ifdef RE_ENABLE_I18N
+static int
+internal_function
+sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
+		     int node_idx, int str_idx, int max_str_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int naccepted;
+  /* Check the node can accept `multi byte'.  */
+  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
+  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
+      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
+			    dfa->nexts[node_idx]))
+    /* The node can't accept the `multi byte', or the
+       destination was already thrown away, then the node
+       could't accept the current input `multi byte'.   */
+    naccepted = 0;
+  /* Otherwise, it is sure that the node could accept
+     `naccepted' bytes input.  */
+  return naccepted;
+}
+#endif /* RE_ENABLE_I18N */
+
+
+/* Functions for state transition.  */
+
+/* Return the next state to which the current state STATE will transit by
+   accepting the current input byte, and update STATE_LOG if necessary.
+   If STATE can accept a multibyte char/collating element/back reference
+   update the destination of STATE_LOG.  */
+
+static re_dfastate_t *
+internal_function
+transit_state (reg_errcode_t *err, re_match_context_t *mctx,
+	       re_dfastate_t *state)
+{
+  re_dfastate_t **trtable;
+  unsigned char ch;
+
+#ifdef RE_ENABLE_I18N
+  /* If the current state can accept multibyte.  */
+  if (BE (state->accept_mb, 0))
+    {
+      *err = transit_state_mb (mctx, state);
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+    }
+#endif /* RE_ENABLE_I18N */
+
+  /* Then decide the next state with the single byte.  */
+#if 0
+  if (0)
+    /* don't use transition table  */
+    return transit_state_sb (err, mctx, state);
+#endif
+
+  /* Use transition table  */
+  ch = re_string_fetch_byte (&mctx->input);
+  for (;;)
+    {
+      trtable = state->trtable;
+      if (BE (trtable != NULL, 1))
+	return trtable[ch];
+
+      trtable = state->word_trtable;
+      if (BE (trtable != NULL, 1))
+        {
+	  unsigned int context;
+	  context
+	    = re_string_context_at (&mctx->input,
+				    re_string_cur_idx (&mctx->input) - 1,
+				    mctx->eflags);
+	  if (IS_WORD_CONTEXT (context))
+	    return trtable[ch + SBC_MAX];
+	  else
+	    return trtable[ch];
+	}
+
+      if (!build_trtable (mctx->dfa, state))
+	{
+	  *err = REG_ESPACE;
+	  return NULL;
+	}
+
+      /* Retry, we now have a transition table.  */
+    }
+}
+
+/* Update the state_log if we need */
+re_dfastate_t *
+internal_function
+merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
+		      re_dfastate_t *next_state)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int cur_idx = re_string_cur_idx (&mctx->input);
+
+  if (cur_idx > mctx->state_log_top)
+    {
+      mctx->state_log[cur_idx] = next_state;
+      mctx->state_log_top = cur_idx;
+    }
+  else if (mctx->state_log[cur_idx] == 0)
+    {
+      mctx->state_log[cur_idx] = next_state;
+    }
+  else
+    {
+      re_dfastate_t *pstate;
+      unsigned int context;
+      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
+      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
+         the destination of a multibyte char/collating element/
+         back reference.  Then the next state is the union set of
+         these destinations and the results of the transition table.  */
+      pstate = mctx->state_log[cur_idx];
+      log_nodes = pstate->entrance_nodes;
+      if (next_state != NULL)
+        {
+          table_nodes = next_state->entrance_nodes;
+          *err = re_node_set_init_union (&next_nodes, table_nodes,
+					     log_nodes);
+          if (BE (*err != REG_NOERROR, 0))
+	    return NULL;
+        }
+      else
+        next_nodes = *log_nodes;
+      /* Note: We already add the nodes of the initial state,
+	 then we don't need to add them here.  */
+
+      context = re_string_context_at (&mctx->input,
+				      re_string_cur_idx (&mctx->input) - 1,
+				      mctx->eflags);
+      next_state = mctx->state_log[cur_idx]
+        = re_acquire_state_context (err, dfa, &next_nodes, context);
+      /* We don't need to check errors here, since the return value of
+         this function is next_state and ERR is already set.  */
+
+      if (table_nodes != NULL)
+        re_node_set_free (&next_nodes);
+    }
+
+  if (BE (dfa->nbackref, 0) && next_state != NULL)
+    {
+      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+	 later.  We must check them here, since the back references in the
+	 next state might use them.  */
+      *err = check_subexp_matching_top (mctx, &next_state->nodes,
+					cur_idx);
+      if (BE (*err != REG_NOERROR, 0))
+	return NULL;
+
+      /* If the next state has back references.  */
+      if (next_state->has_backref)
+	{
+	  *err = transit_state_bkref (mctx, &next_state->nodes);
+	  if (BE (*err != REG_NOERROR, 0))
+	    return NULL;
+	  next_state = mctx->state_log[cur_idx];
+	}
+    }
+
+  return next_state;
+}
+
+/* Skip bytes in the input that correspond to part of a
+   multi-byte match, then look in the log for a state
+   from which to restart matching.  */
+re_dfastate_t *
+internal_function
+find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
+{
+  re_dfastate_t *cur_state;
+  do
+    {
+      int max = mctx->state_log_top;
+      int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+      do
+	{
+          if (++cur_str_idx > max)
+            return NULL;
+          re_string_skip_bytes (&mctx->input, 1);
+	}
+      while (mctx->state_log[cur_str_idx] == NULL);
+
+      cur_state = merge_state_with_log (err, mctx, NULL);
+    }
+  while (*err == REG_NOERROR && cur_state == NULL);
+  return cur_state;
+}
+
+/* Helper functions for transit_state.  */
+
+/* From the node set CUR_NODES, pick up the nodes whose types are
+   OP_OPEN_SUBEXP and which have corresponding back references in the regular
+   expression. And register them to use them later for evaluating the
+   correspoding back references.  */
+
+static reg_errcode_t
+internal_function
+check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
+			   int str_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int node_idx;
+  reg_errcode_t err;
+
+  /* TODO: This isn't efficient.
+	   Because there might be more than one nodes whose types are
+	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+	   nodes.
+	   E.g. RE: (a){2}  */
+  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
+    {
+      int node = cur_nodes->elems[node_idx];
+      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+	  && (dfa->used_bkref_map
+	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
+	{
+	  err = match_ctx_add_subtop (mctx, node, str_idx);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+    }
+  return REG_NOERROR;
+}
+
+#if 0
+/* Return the next state to which the current state STATE will transit by
+   accepting the current input byte.  */
+
+static re_dfastate_t *
+transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
+		  re_dfastate_t *state)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  re_node_set next_nodes;
+  re_dfastate_t *next_state;
+  int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
+  unsigned int context;
+
+  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+  if (BE (*err != REG_NOERROR, 0))
+    return NULL;
+  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
+    {
+      int cur_node = state->nodes.elems[node_cnt];
+      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
+	{
+	  *err = re_node_set_merge (&next_nodes,
+				    dfa->eclosures + dfa->nexts[cur_node]);
+	  if (BE (*err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return NULL;
+	    }
+	}
+    }
+  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
+  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+  /* We don't need to check errors here, since the return value of
+     this function is next_state and ERR is already set.  */
+
+  re_node_set_free (&next_nodes);
+  re_string_skip_bytes (&mctx->input, 1);
+  return next_state;
+}
+#endif
+
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t
+internal_function
+transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int i;
+
+  for (i = 0; i < pstate->nodes.nelem; ++i)
+    {
+      re_node_set dest_nodes, *new_nodes;
+      int cur_node_idx = pstate->nodes.elems[i];
+      int naccepted, dest_idx;
+      unsigned int context;
+      re_dfastate_t *dest_state;
+
+      if (!dfa->nodes[cur_node_idx].accept_mb)
+        continue;
+
+      if (dfa->nodes[cur_node_idx].constraint)
+	{
+	  context = re_string_context_at (&mctx->input,
+					  re_string_cur_idx (&mctx->input),
+					  mctx->eflags);
+	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
+					   context))
+	    continue;
+	}
+
+      /* How many bytes the node can accept?  */
+      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
+					   re_string_cur_idx (&mctx->input));
+      if (naccepted == 0)
+	continue;
+
+      /* The node can accepts `naccepted' bytes.  */
+      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
+      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
+			       : mctx->max_mb_elem_len);
+      err = clean_state_log_if_needed (mctx, dest_idx);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+#ifdef DEBUG
+      assert (dfa->nexts[cur_node_idx] != -1);
+#endif
+      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
+
+      dest_state = mctx->state_log[dest_idx];
+      if (dest_state == NULL)
+	dest_nodes = *new_nodes;
+      else
+	{
+	  err = re_node_set_init_union (&dest_nodes,
+					dest_state->entrance_nodes, new_nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+      context = re_string_context_at (&mctx->input, dest_idx - 1,
+				      mctx->eflags);
+      mctx->state_log[dest_idx]
+	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
+      if (dest_state != NULL)
+	re_node_set_free (&dest_nodes);
+      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+	return err;
+    }
+  return REG_NOERROR;
+}
+#endif /* RE_ENABLE_I18N */
+
+static reg_errcode_t
+internal_function
+transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int i;
+  int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+  for (i = 0; i < nodes->nelem; ++i)
+    {
+      int dest_str_idx, prev_nelem, bkc_idx;
+      int node_idx = nodes->elems[i];
+      unsigned int context;
+      const re_token_t *node = dfa->nodes + node_idx;
+      re_node_set *new_dest_nodes;
+
+      /* Check whether `node' is a backreference or not.  */
+      if (node->type != OP_BACK_REF)
+	continue;
+
+      if (node->constraint)
+	{
+	  context = re_string_context_at (&mctx->input, cur_str_idx,
+					  mctx->eflags);
+	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+	    continue;
+	}
+
+      /* `node' is a backreference.
+	 Check the substring which the substring matched.  */
+      bkc_idx = mctx->nbkref_ents;
+      err = get_subexp (mctx, node_idx, cur_str_idx);
+      if (BE (err != REG_NOERROR, 0))
+	goto free_return;
+
+      /* And add the epsilon closures (which is `new_dest_nodes') of
+	 the backreference to appropriate state_log.  */
+#ifdef DEBUG
+      assert (dfa->nexts[node_idx] != -1);
+#endif
+      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
+	{
+	  int subexp_len;
+	  re_dfastate_t *dest_state;
+	  struct re_backref_cache_entry *bkref_ent;
+	  bkref_ent = mctx->bkref_ents + bkc_idx;
+	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
+	    continue;
+	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
+	  new_dest_nodes = (subexp_len == 0
+			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
+			    : dfa->eclosures + dfa->nexts[node_idx]);
+	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
+			  - bkref_ent->subexp_from);
+	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
+					  mctx->eflags);
+	  dest_state = mctx->state_log[dest_str_idx];
+	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+			: mctx->state_log[cur_str_idx]->nodes.nelem);
+	  /* Add `new_dest_node' to state_log.  */
+	  if (dest_state == NULL)
+	    {
+	      mctx->state_log[dest_str_idx]
+		= re_acquire_state_context (&err, dfa, new_dest_nodes,
+					    context);
+	      if (BE (mctx->state_log[dest_str_idx] == NULL
+		      && err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  else
+	    {
+	      re_node_set dest_nodes;
+	      err = re_node_set_init_union (&dest_nodes,
+					    dest_state->entrance_nodes,
+					    new_dest_nodes);
+	      if (BE (err != REG_NOERROR, 0))
+		{
+		  re_node_set_free (&dest_nodes);
+		  goto free_return;
+		}
+	      mctx->state_log[dest_str_idx]
+		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
+	      re_node_set_free (&dest_nodes);
+	      if (BE (mctx->state_log[dest_str_idx] == NULL
+		      && err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	  /* We need to check recursively if the backreference can epsilon
+	     transit.  */
+	  if (subexp_len == 0
+	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+	    {
+	      err = check_subexp_matching_top (mctx, new_dest_nodes,
+					       cur_str_idx);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	      err = transit_state_bkref (mctx, new_dest_nodes);
+	      if (BE (err != REG_NOERROR, 0))
+		goto free_return;
+	    }
+	}
+    }
+  err = REG_NOERROR;
+ free_return:
+  return err;
+}
+
+/* Enumerate all the candidates which the backreference BKREF_NODE can match
+   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
+   Note that we might collect inappropriate candidates here.
+   However, the cost of checking them strictly here is too high, then we
+   delay these checking for prune_impossible_nodes().  */
+
+static reg_errcode_t
+internal_function
+get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int subexp_num, sub_top_idx;
+  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
+  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
+  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
+  if (cache_idx != -1)
+    {
+      const struct re_backref_cache_entry *entry
+	= mctx->bkref_ents + cache_idx;
+      do
+        if (entry->node == bkref_node)
+	  return REG_NOERROR; /* We already checked it.  */
+      while (entry++->more);
+    }
+
+  subexp_num = dfa->nodes[bkref_node].opr.idx;
+
+  /* For each sub expression  */
+  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
+    {
+      reg_errcode_t err;
+      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
+      re_sub_match_last_t *sub_last;
+      int sub_last_idx, sl_str, bkref_str_off;
+
+      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
+	continue; /* It isn't related.  */
+
+      sl_str = sub_top->str_idx;
+      bkref_str_off = bkref_str_idx;
+      /* At first, check the last node of sub expressions we already
+	 evaluated.  */
+      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
+	{
+	  int sl_str_diff;
+	  sub_last = sub_top->lasts[sub_last_idx];
+	  sl_str_diff = sub_last->str_idx - sl_str;
+	  /* The matched string by the sub expression match with the substring
+	     at the back reference?  */
+	  if (sl_str_diff > 0)
+	    {
+	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
+		{
+		  /* Not enough chars for a successful match.  */
+		  if (bkref_str_off + sl_str_diff > mctx->input.len)
+		    break;
+
+		  err = clean_state_log_if_needed (mctx,
+						   bkref_str_off
+						   + sl_str_diff);
+		  if (BE (err != REG_NOERROR, 0))
+		    return err;
+		  buf = (const char *) re_string_get_buffer (&mctx->input);
+		}
+	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
+		/* We don't need to search this sub expression any more.  */
+		break;
+	    }
+	  bkref_str_off += sl_str_diff;
+	  sl_str += sl_str_diff;
+	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+				bkref_str_idx);
+
+	  /* Reload buf, since the preceding call might have reallocated
+	     the buffer.  */
+	  buf = (const char *) re_string_get_buffer (&mctx->input);
+
+	  if (err == REG_NOMATCH)
+	    continue;
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+
+      if (sub_last_idx < sub_top->nlasts)
+	continue;
+      if (sub_last_idx > 0)
+	++sl_str;
+      /* Then, search for the other last nodes of the sub expression.  */
+      for (; sl_str <= bkref_str_idx; ++sl_str)
+	{
+	  int cls_node, sl_str_off;
+	  const re_node_set *nodes;
+	  sl_str_off = sl_str - sub_top->str_idx;
+	  /* The matched string by the sub expression match with the substring
+	     at the back reference?  */
+	  if (sl_str_off > 0)
+	    {
+	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
+		{
+		  /* If we are at the end of the input, we cannot match.  */
+		  if (bkref_str_off >= mctx->input.len)
+		    break;
+
+		  err = extend_buffers (mctx);
+		  if (BE (err != REG_NOERROR, 0))
+		    return err;
+
+		  buf = (const char *) re_string_get_buffer (&mctx->input);
+		}
+	      if (buf [bkref_str_off++] != buf[sl_str - 1])
+		break; /* We don't need to search this sub expression
+			  any more.  */
+	    }
+	  if (mctx->state_log[sl_str] == NULL)
+	    continue;
+	  /* Does this state have a ')' of the sub expression?  */
+	  nodes = &mctx->state_log[sl_str]->nodes;
+	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
+				       OP_CLOSE_SUBEXP);
+	  if (cls_node == -1)
+	    continue; /* No.  */
+	  if (sub_top->path == NULL)
+	    {
+	      sub_top->path = calloc (sizeof (state_array_t),
+				      sl_str - sub_top->str_idx + 1);
+	      if (sub_top->path == NULL)
+		return REG_ESPACE;
+	    }
+	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
+	     in the current context?  */
+	  err = check_arrival (mctx, sub_top->path, sub_top->node,
+			       sub_top->str_idx, cls_node, sl_str,
+			       OP_CLOSE_SUBEXP);
+	  if (err == REG_NOMATCH)
+	      continue;
+	  if (BE (err != REG_NOERROR, 0))
+	      return err;
+	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
+	  if (BE (sub_last == NULL, 0))
+	    return REG_ESPACE;
+	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+				bkref_str_idx);
+	  if (err == REG_NOMATCH)
+	    continue;
+	}
+    }
+  return REG_NOERROR;
+}
+
+/* Helper functions for get_subexp().  */
+
+/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
+   If it can arrive, register the sub expression expressed with SUB_TOP
+   and SUB_LAST.  */
+
+static reg_errcode_t
+internal_function
+get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
+		re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
+{
+  reg_errcode_t err;
+  int to_idx;
+  /* Can the subexpression arrive the back reference?  */
+  err = check_arrival (mctx, &sub_last->path, sub_last->node,
+		       sub_last->str_idx, bkref_node, bkref_str,
+		       OP_OPEN_SUBEXP);
+  if (err != REG_NOERROR)
+    return err;
+  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
+			     sub_last->str_idx);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
+  return clean_state_log_if_needed (mctx, to_idx);
+}
+
+/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
+   Search '(' if FL_OPEN, or search ')' otherwise.
+   TODO: This function isn't efficient...
+	 Because there might be more than one nodes whose types are
+	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+	 nodes.
+	 E.g. RE: (a){2}  */
+
+static int
+internal_function
+find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+		  int subexp_idx, int type)
+{
+  int cls_idx;
+  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
+    {
+      int cls_node = nodes->elems[cls_idx];
+      const re_token_t *node = dfa->nodes + cls_node;
+      if (node->type == type
+	  && node->opr.idx == subexp_idx)
+	return cls_node;
+    }
+  return -1;
+}
+
+/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
+   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
+   heavily reused.
+   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
+
+static reg_errcode_t
+internal_function
+check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
+	       int top_str, int last_node, int last_str, int type)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err = REG_NOERROR;
+  int subexp_num, backup_cur_idx, str_idx, null_cnt;
+  re_dfastate_t *cur_state = NULL;
+  re_node_set *cur_nodes, next_nodes;
+  re_dfastate_t **backup_state_log;
+  unsigned int context;
+
+  subexp_num = dfa->nodes[top_node].opr.idx;
+  /* Extend the buffer if we need.  */
+  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
+    {
+      re_dfastate_t **new_array;
+      int old_alloc = path->alloc;
+      path->alloc += last_str + mctx->max_mb_elem_len + 1;
+      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
+      if (BE (new_array == NULL, 0))
+	{
+	  path->alloc = old_alloc;
+	  return REG_ESPACE;
+	}
+      path->array = new_array;
+      memset (new_array + old_alloc, '\0',
+	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
+    }
+
+  str_idx = path->next_idx ? path->next_idx : top_str;
+
+  /* Temporary modify MCTX.  */
+  backup_state_log = mctx->state_log;
+  backup_cur_idx = mctx->input.cur_idx;
+  mctx->state_log = path->array;
+  mctx->input.cur_idx = str_idx;
+
+  /* Setup initial node set.  */
+  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+  if (str_idx == top_str)
+    {
+      err = re_node_set_init_1 (&next_nodes, top_node);
+      if (BE (err != REG_NOERROR, 0))
+	return err;
+      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+      if (BE (err != REG_NOERROR, 0))
+	{
+	  re_node_set_free (&next_nodes);
+	  return err;
+	}
+    }
+  else
+    {
+      cur_state = mctx->state_log[str_idx];
+      if (cur_state && cur_state->has_backref)
+	{
+	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+      else
+	re_node_set_init_empty (&next_nodes);
+    }
+  if (str_idx == top_str || (cur_state && cur_state->has_backref))
+    {
+      if (next_nodes.nelem)
+	{
+	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+				    subexp_num, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+	{
+	  re_node_set_free (&next_nodes);
+	  return err;
+	}
+      mctx->state_log[str_idx] = cur_state;
+    }
+
+  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
+    {
+      re_node_set_empty (&next_nodes);
+      if (mctx->state_log[str_idx + 1])
+	{
+	  err = re_node_set_merge (&next_nodes,
+				   &mctx->state_log[str_idx + 1]->nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      if (cur_state)
+	{
+	  err = check_arrival_add_next_nodes (mctx, str_idx,
+					      &cur_state->non_eps_nodes,
+					      &next_nodes);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      ++str_idx;
+      if (next_nodes.nelem)
+	{
+	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+				    subexp_num, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&next_nodes);
+	      return err;
+	    }
+	}
+      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+	{
+	  re_node_set_free (&next_nodes);
+	  return err;
+	}
+      mctx->state_log[str_idx] = cur_state;
+      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
+    }
+  re_node_set_free (&next_nodes);
+  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
+	       : &mctx->state_log[last_str]->nodes);
+  path->next_idx = str_idx;
+
+  /* Fix MCTX.  */
+  mctx->state_log = backup_state_log;
+  mctx->input.cur_idx = backup_cur_idx;
+
+  /* Then check the current node set has the node LAST_NODE.  */
+  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
+    return REG_NOERROR;
+
+  return REG_NOMATCH;
+}
+
+/* Helper functions for check_arrival.  */
+
+/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
+   to NEXT_NODES.
+   TODO: This function is similar to the functions transit_state*(),
+	 however this function has many additional works.
+	 Can't we unify them?  */
+
+static reg_errcode_t
+internal_function
+check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
+			      re_node_set *cur_nodes, re_node_set *next_nodes)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  int result;
+  int cur_idx;
+  reg_errcode_t err = REG_NOERROR;
+  re_node_set union_set;
+  re_node_set_init_empty (&union_set);
+  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
+    {
+      int naccepted = 0;
+      int cur_node = cur_nodes->elems[cur_idx];
+#ifdef DEBUG
+      re_token_type_t type = dfa->nodes[cur_node].type;
+      assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+      /* If the node may accept `multi byte'.  */
+      if (dfa->nodes[cur_node].accept_mb)
+	{
+	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
+					       str_idx);
+	  if (naccepted > 1)
+	    {
+	      re_dfastate_t *dest_state;
+	      int next_node = dfa->nexts[cur_node];
+	      int next_idx = str_idx + naccepted;
+	      dest_state = mctx->state_log[next_idx];
+	      re_node_set_empty (&union_set);
+	      if (dest_state)
+		{
+		  err = re_node_set_merge (&union_set, &dest_state->nodes);
+		  if (BE (err != REG_NOERROR, 0))
+		    {
+		      re_node_set_free (&union_set);
+		      return err;
+		    }
+		}
+	      result = re_node_set_insert (&union_set, next_node);
+	      if (BE (result < 0, 0))
+		{
+		  re_node_set_free (&union_set);
+		  return REG_ESPACE;
+		}
+	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
+							    &union_set);
+	      if (BE (mctx->state_log[next_idx] == NULL
+		      && err != REG_NOERROR, 0))
+		{
+		  re_node_set_free (&union_set);
+		  return err;
+		}
+	    }
+	}
+#endif /* RE_ENABLE_I18N */
+      if (naccepted
+	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
+	{
+	  result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+	  if (BE (result < 0, 0))
+	    {
+	      re_node_set_free (&union_set);
+	      return REG_ESPACE;
+	    }
+	}
+    }
+  re_node_set_free (&union_set);
+  return REG_NOERROR;
+}
+
+/* For all the nodes in CUR_NODES, add the epsilon closures of them to
+   CUR_NODES, however exclude the nodes which are:
+    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
+    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
+*/
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
+			  int ex_subexp, int type)
+{
+  reg_errcode_t err;
+  int idx, outside_node;
+  re_node_set new_nodes;
+#ifdef DEBUG
+  assert (cur_nodes->nelem);
+#endif
+  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
+  if (BE (err != REG_NOERROR, 0))
+    return err;
+  /* Create a new node set NEW_NODES with the nodes which are epsilon
+     closures of the node in CUR_NODES.  */
+
+  for (idx = 0; idx < cur_nodes->nelem; ++idx)
+    {
+      int cur_node = cur_nodes->elems[idx];
+      const re_node_set *eclosure = dfa->eclosures + cur_node;
+      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
+      if (outside_node == -1)
+	{
+	  /* There are no problematic nodes, just merge them.  */
+	  err = re_node_set_merge (&new_nodes, eclosure);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&new_nodes);
+	      return err;
+	    }
+	}
+      else
+	{
+	  /* There are problematic nodes, re-calculate incrementally.  */
+	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
+					      ex_subexp, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    {
+	      re_node_set_free (&new_nodes);
+	      return err;
+	    }
+	}
+    }
+  re_node_set_free (cur_nodes);
+  *cur_nodes = new_nodes;
+  return REG_NOERROR;
+}
+
+/* Helper function for check_arrival_expand_ecl.
+   Check incrementally the epsilon closure of TARGET, and if it isn't
+   problematic append it to DST_NODES.  */
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
+			      int target, int ex_subexp, int type)
+{
+  int cur_node;
+  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
+    {
+      int err;
+
+      if (dfa->nodes[cur_node].type == type
+	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
+	{
+	  if (type == OP_CLOSE_SUBEXP)
+	    {
+	      err = re_node_set_insert (dst_nodes, cur_node);
+	      if (BE (err == -1, 0))
+		return REG_ESPACE;
+	    }
+	  break;
+	}
+      err = re_node_set_insert (dst_nodes, cur_node);
+      if (BE (err == -1, 0))
+	return REG_ESPACE;
+      if (dfa->edests[cur_node].nelem == 0)
+	break;
+      if (dfa->edests[cur_node].nelem == 2)
+	{
+	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
+					      dfa->edests[cur_node].elems[1],
+					      ex_subexp, type);
+	  if (BE (err != REG_NOERROR, 0))
+	    return err;
+	}
+      cur_node = dfa->edests[cur_node].elems[0];
+    }
+  return REG_NOERROR;
+}
+
+
+/* For all the back references in the current state, calculate the
+   destination of the back references by the appropriate entry
+   in MCTX->BKREF_ENTS.  */
+
+static reg_errcode_t
+internal_function
+expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
+		    int cur_str, int subexp_num, int type)
+{
+  const re_dfa_t *const dfa = mctx->dfa;
+  reg_errcode_t err;
+  int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+  struct re_backref_cache_entry *ent;
+
+  if (cache_idx_start == -1)
+    return REG_NOERROR;
+
+ restart:
+  ent = mctx->bkref_ents + cache_idx_start;
+  do
+    {
+      int to_idx, next_node;
+
+      /* Is this entry ENT is appropriate?  */
+      if (!re_node_set_contains (cur_nodes, ent->node))
+	continue; /* No.  */
+
+      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
+      /* Calculate the destination of the back reference, and append it
+	 to MCTX->STATE_LOG.  */
+      if (to_idx == cur_str)
+	{
+	  /* The backreference did epsilon transit, we must re-check all the
+	     node in the current state.  */
+	  re_node_set new_dests;
+	  reg_errcode_t err2, err3;
+	  next_node = dfa->edests[ent->node].elems[0];
+	  if (re_node_set_contains (cur_nodes, next_node))
+	    continue;
+	  err = re_node_set_init_1 (&new_dests, next_node);
+	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
+	  err3 = re_node_set_merge (cur_nodes, &new_dests);
+	  re_node_set_free (&new_dests);
+	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
+		  || err3 != REG_NOERROR, 0))
+	    {
+	      err = (err != REG_NOERROR ? err
+		     : (err2 != REG_NOERROR ? err2 : err3));
+	      return err;
+	    }
+	  /* TODO: It is still inefficient...  */
+	  goto restart;
+	}
+      else
+	{
+	  re_node_set union_set;
+	  next_node = dfa->nexts[ent->node];
+	  if (mctx->state_log[to_idx])
+	    {
+	      int ret;
+	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
+					next_node))
+		continue;
+	      err = re_node_set_init_copy (&union_set,
+					   &mctx->state_log[to_idx]->nodes);
+	      ret = re_node_set_insert (&union_set, next_node);
+	      if (BE (err != REG_NOERROR || ret < 0, 0))
+		{
+		  re_node_set_free (&union_set);
+		  err = err != REG_NOERROR ? err : REG_ESPACE;
+		  return err;
+		}
+	    }
+	  else
+	    {
+	      err = re_node_set_init_1 (&union_set, next_node);
+	      if (BE (err != REG_NOERROR, 0))
+		return err;
+	    }
+	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
+	  re_node_set_free (&union_set);
+	  if (BE (mctx->state_log[to_idx] == NULL
+		  && err != REG_NOERROR, 0))
+	    return err;
+	}
+    }
+  while (ent++->more);
+  return REG_NOERROR;
+}
+
+/* Build transition table for the state.
+   Return 1 if succeeded, otherwise return NULL.  */
+
+static int
+internal_function
+build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
+{
+  reg_errcode_t err;
+  int i, j, ch, need_word_trtable = 0;
+  bitset_word_t elem, mask;
+  bool dests_node_malloced = false;
+  bool dest_states_malloced = false;
+  int ndests; /* Number of the destination states from `state'.  */
+  re_dfastate_t **trtable;
+  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
+  re_node_set follows, *dests_node;
+  bitset_t *dests_ch;
+  bitset_t acceptable;
+
+  struct dests_alloc
+  {
+    re_node_set dests_node[SBC_MAX];
+    bitset_t dests_ch[SBC_MAX];
+  } *dests_alloc;
+
+  /* We build DFA states which corresponds to the destination nodes
+     from `state'.  `dests_node[i]' represents the nodes which i-th
+     destination state contains, and `dests_ch[i]' represents the
+     characters which i-th destination state accepts.  */
+  if (__libc_use_alloca (sizeof (struct dests_alloc)))
+    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
+  else
+    {
+      dests_alloc = re_malloc (struct dests_alloc, 1);
+      if (BE (dests_alloc == NULL, 0))
+	return 0;
+      dests_node_malloced = true;
+    }
+  dests_node = dests_alloc->dests_node;
+  dests_ch = dests_alloc->dests_ch;
+
+  /* Initialize transiton table.  */
+  state->word_trtable = state->trtable = NULL;
+
+  /* At first, group all nodes belonging to `state' into several
+     destinations.  */
+  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
+  if (BE (ndests <= 0, 0))
+    {
+      if (dests_node_malloced)
+	free (dests_alloc);
+      /* Return 0 in case of an error, 1 otherwise.  */
+      if (ndests == 0)
+	{
+	  state->trtable = (re_dfastate_t **)
+	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
+	  return 1;
+	}
+      return 0;
+    }
+
+  err = re_node_set_alloc (&follows, ndests + 1);
+  if (BE (err != REG_NOERROR, 0))
+    goto out_free;
+
+  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+			 + ndests * 3 * sizeof (re_dfastate_t *)))
+    dest_states = (re_dfastate_t **)
+      alloca (ndests * 3 * sizeof (re_dfastate_t *));
+  else
+    {
+      dest_states = (re_dfastate_t **)
+	malloc (ndests * 3 * sizeof (re_dfastate_t *));
+      if (BE (dest_states == NULL, 0))
+	{
+out_free:
+	  if (dest_states_malloced)
+	    free (dest_states);
+	  re_node_set_free (&follows);
+	  for (i = 0; i < ndests; ++i)
+	    re_node_set_free (dests_node + i);
+	  if (dests_node_malloced)
+	    free (dests_alloc);
+	  return 0;
+	}
+      dest_states_malloced = true;
+    }
+  dest_states_word = dest_states + ndests;
+  dest_states_nl = dest_states_word + ndests;
+  bitset_empty (acceptable);
+
+  /* Then build the states for all destinations.  */
+  for (i = 0; i < ndests; ++i)
+    {
+      int next_node;
+      re_node_set_empty (&follows);
+      /* Merge the follows of this destination states.  */
+      for (j = 0; j < dests_node[i].nelem; ++j)
+	{
+	  next_node = dfa->nexts[dests_node[i].elems[j]];
+	  if (next_node != -1)
+	    {
+	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+	      if (BE (err != REG_NOERROR, 0))
+		goto out_free;
+	    }
+	}
+      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
+	goto out_free;
+      /* If the new state has context constraint,
+	 build appropriate states for these contexts.  */
+      if (dest_states[i]->has_constraint)
+	{
+	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
+							  CONTEXT_WORD);
+	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
+	    goto out_free;
+
+	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
+	    need_word_trtable = 1;
+
+	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
+							CONTEXT_NEWLINE);
+	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
+	    goto out_free;
+ 	}
+      else
+	{
+	  dest_states_word[i] = dest_states[i];
+	  dest_states_nl[i] = dest_states[i];
+	}
+      bitset_merge (acceptable, dests_ch[i]);
+    }
+
+  if (!BE (need_word_trtable, 0))
+    {
+      /* We don't care about whether the following character is a word
+	 character, or we are in a single-byte character set so we can
+	 discern by looking at the character code: allocate a
+	 256-entry transition table.  */
+      trtable = state->trtable =
+	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+      if (BE (trtable == NULL, 0))
+	goto out_free;
+
+      /* For all characters ch...:  */
+      for (i = 0; i < BITSET_WORDS; ++i)
+	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+	     elem;
+	     mask <<= 1, elem >>= 1, ++ch)
+	  if (BE (elem & 1, 0))
+	    {
+	      /* There must be exactly one destination which accepts
+		 character ch.  See group_nodes_into_DFAstates.  */
+	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+		;
+
+	      /* j-th destination accepts the word character ch.  */
+	      if (dfa->word_char[i] & mask)
+		trtable[ch] = dest_states_word[j];
+	      else
+		trtable[ch] = dest_states[j];
+	    }
+    }
+  else
+    {
+      /* We care about whether the following character is a word
+	 character, and we are in a multi-byte character set: discern
+	 by looking at the character code: build two 256-entry
+	 transition tables, one starting at trtable[0] and one
+	 starting at trtable[SBC_MAX].  */
+      trtable = state->word_trtable =
+	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
+      if (BE (trtable == NULL, 0))
+	goto out_free;
+
+      /* For all characters ch...:  */
+      for (i = 0; i < BITSET_WORDS; ++i)
+	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+	     elem;
+	     mask <<= 1, elem >>= 1, ++ch)
+	  if (BE (elem & 1, 0))
+	    {
+	      /* There must be exactly one destination which accepts
+		 character ch.  See group_nodes_into_DFAstates.  */
+	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+		;
+
+	      /* j-th destination accepts the word character ch.  */
+	      trtable[ch] = dest_states[j];
+	      trtable[ch + SBC_MAX] = dest_states_word[j];
+	    }
+    }
+
+  /* new line */
+  if (bitset_contain (acceptable, NEWLINE_CHAR))
+    {
+      /* The current state accepts newline character.  */
+      for (j = 0; j < ndests; ++j)
+	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
+	  {
+	    /* k-th destination accepts newline character.  */
+	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
+	    if (need_word_trtable)
+	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
+	    /* There must be only one destination which accepts
+	       newline.  See group_nodes_into_DFAstates.  */
+	    break;
+	  }
+    }
+
+  if (dest_states_malloced)
+    free (dest_states);
+
+  re_node_set_free (&follows);
+  for (i = 0; i < ndests; ++i)
+    re_node_set_free (dests_node + i);
+
+  if (dests_node_malloced)
+    free (dests_alloc);
+
+  return 1;
+}
+
+/* Group all nodes belonging to STATE into several destinations.
+   Then for all destinations, set the nodes belonging to the destination
+   to DESTS_NODE[i] and set the characters accepted by the destination
+   to DEST_CH[i].  This function return the number of destinations.  */
+
+static int
+internal_function
+group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
+			    re_node_set *dests_node, bitset_t *dests_ch)
+{
+  reg_errcode_t err;
+  int result;
+  int i, j, k;
+  int ndests; /* Number of the destinations from `state'.  */
+  bitset_t accepts; /* Characters a node can accept.  */
+  const re_node_set *cur_nodes = &state->nodes;
+  bitset_empty (accepts);
+  ndests = 0;
+
+  /* For all the nodes belonging to `state',  */
+  for (i = 0; i < cur_nodes->nelem; ++i)
+    {
+      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
+      re_token_type_t type = node->type;
+      unsigned int constraint = node->constraint;
+
+      /* Enumerate all single byte character this node can accept.  */
+      if (type == CHARACTER)
+	bitset_set (accepts, node->opr.c);
+      else if (type == SIMPLE_BRACKET)
+	{
+	  bitset_merge (accepts, node->opr.sbcset);
+	}
+      else if (type == OP_PERIOD)
+	{
+#ifdef RE_ENABLE_I18N
+	  if (dfa->mb_cur_max > 1)
+	    bitset_merge (accepts, dfa->sb_char);
+	  else
+#endif
+	    bitset_set_all (accepts);
+	  if (!(dfa->syntax & RE_DOT_NEWLINE))
+	    bitset_clear (accepts, '\n');
+	  if (dfa->syntax & RE_DOT_NOT_NULL)
+	    bitset_clear (accepts, '\0');
+	}
+#ifdef RE_ENABLE_I18N
+      else if (type == OP_UTF8_PERIOD)
+        {
+	  memset (accepts, '\xff', sizeof (bitset_t) / 2);
+	  if (!(dfa->syntax & RE_DOT_NEWLINE))
+	    bitset_clear (accepts, '\n');
+	  if (dfa->syntax & RE_DOT_NOT_NULL)
+	    bitset_clear (accepts, '\0');
+        }
+#endif
+      else
+	continue;
+
+      /* Check the `accepts' and sift the characters which are not
+	 match it the context.  */
+      if (constraint)
+	{
+	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
+	    {
+	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+	      bitset_empty (accepts);
+	      if (accepts_newline)
+		bitset_set (accepts, NEWLINE_CHAR);
+	      else
+		continue;
+	    }
+	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
+	    {
+	      bitset_empty (accepts);
+	      continue;
+	    }
+
+	  if (constraint & NEXT_WORD_CONSTRAINT)
+	    {
+	      bitset_word_t any_set = 0;
+	      if (type == CHARACTER && !node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
+#ifdef RE_ENABLE_I18N
+	      if (dfa->mb_cur_max > 1)
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
+	      else
+#endif
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= dfa->word_char[j]);
+	      if (!any_set)
+		continue;
+	    }
+	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
+	    {
+	      bitset_word_t any_set = 0;
+	      if (type == CHARACTER && node->word_char)
+		{
+		  bitset_empty (accepts);
+		  continue;
+		}
+#ifdef RE_ENABLE_I18N
+	      if (dfa->mb_cur_max > 1)
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
+	      else
+#endif
+		for (j = 0; j < BITSET_WORDS; ++j)
+		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
+	      if (!any_set)
+		continue;
+	    }
+	}
+
+      /* Then divide `accepts' into DFA states, or create a new
+	 state.  Above, we make sure that accepts is not empty.  */
+      for (j = 0; j < ndests; ++j)
+	{
+	  bitset_t intersec; /* Intersection sets, see below.  */
+	  bitset_t remains;
+	  /* Flags, see below.  */
+	  bitset_word_t has_intersec, not_subset, not_consumed;
+
+	  /* Optimization, skip if this state doesn't accept the character.  */
+	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
+	    continue;
+
+	  /* Enumerate the intersection set of this state and `accepts'.  */
+	  has_intersec = 0;
+	  for (k = 0; k < BITSET_WORDS; ++k)
+	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
+	  /* And skip if the intersection set is empty.  */
+	  if (!has_intersec)
+	    continue;
+
+	  /* Then check if this state is a subset of `accepts'.  */
+	  not_subset = not_consumed = 0;
+	  for (k = 0; k < BITSET_WORDS; ++k)
+	    {
+	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
+	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
+	    }
+
+	  /* If this state isn't a subset of `accepts', create a
+	     new group state, which has the `remains'. */
+	  if (not_subset)
+	    {
+	      bitset_copy (dests_ch[ndests], remains);
+	      bitset_copy (dests_ch[j], intersec);
+	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+	      if (BE (err != REG_NOERROR, 0))
+		goto error_return;
+	      ++ndests;
+	    }
+
+	  /* Put the position in the current group. */
+	  result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+	  if (BE (result < 0, 0))
+	    goto error_return;
+
+	  /* If all characters are consumed, go to next node. */
+	  if (!not_consumed)
+	    break;
+	}
+      /* Some characters remain, create a new group. */
+      if (j == ndests)
+	{
+	  bitset_copy (dests_ch[ndests], accepts);
+	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+	  if (BE (err != REG_NOERROR, 0))
+	    goto error_return;
+	  ++ndests;
+	  bitset_empty (accepts);
+	}
+    }
+  return ndests;
+ error_return:
+  for (j = 0; j < ndests; ++j)
+    re_node_set_free (dests_node + j);
+  return -1;
+}
+
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+   Return the number of the bytes the node accepts.
+   STR_IDX is the current index of the input string.
+
+   This function handles the nodes which can accept one character, or
+   one collating element like '.', '[a-z]', opposite to the other nodes
+   can only accept one byte.  */
+
+static int
+internal_function
+check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+			 const re_string_t *input, int str_idx)
+{
+  const re_token_t *node = dfa->nodes + node_idx;
+  int char_len, elem_len;
+  int i;
+
+  if (BE (node->type == OP_UTF8_PERIOD, 0))
+    {
+      unsigned char c = re_string_byte_at (input, str_idx), d;
+      if (BE (c < 0xc2, 1))
+	return 0;
+
+      if (str_idx + 2 > input->len)
+	return 0;
+
+      d = re_string_byte_at (input, str_idx + 1);
+      if (c < 0xe0)
+	return (d < 0x80 || d > 0xbf) ? 0 : 2;
+      else if (c < 0xf0)
+	{
+	  char_len = 3;
+	  if (c == 0xe0 && d < 0xa0)
+	    return 0;
+	}
+      else if (c < 0xf8)
+	{
+	  char_len = 4;
+	  if (c == 0xf0 && d < 0x90)
+	    return 0;
+	}
+      else if (c < 0xfc)
+	{
+	  char_len = 5;
+	  if (c == 0xf8 && d < 0x88)
+	    return 0;
+	}
+      else if (c < 0xfe)
+	{
+	  char_len = 6;
+	  if (c == 0xfc && d < 0x84)
+	    return 0;
+	}
+      else
+	return 0;
+
+      if (str_idx + char_len > input->len)
+	return 0;
+
+      for (i = 1; i < char_len; ++i)
+	{
+	  d = re_string_byte_at (input, str_idx + i);
+	  if (d < 0x80 || d > 0xbf)
+	    return 0;
+	}
+      return char_len;
+    }
+
+  char_len = re_string_char_size_at (input, str_idx);
+  if (node->type == OP_PERIOD)
+    {
+      if (char_len <= 1)
+        return 0;
+      /* FIXME: I don't think this if is needed, as both '\n'
+	 and '\0' are char_len == 1.  */
+      /* '.' accepts any one character except the following two cases.  */
+      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
+	   re_string_byte_at (input, str_idx) == '\n') ||
+	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
+	   re_string_byte_at (input, str_idx) == '\0'))
+	return 0;
+      return char_len;
+    }
+
+  elem_len = re_string_elem_size_at (input, str_idx);
+  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
+    return 0;
+
+  if (node->type == COMPLEX_BRACKET)
+    {
+      const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
+      const unsigned char *pin
+	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
+      int j;
+      uint32_t nrules;
+# endif /* _LIBC */
+      int match_len = 0;
+      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+		    ? re_string_wchar_at (input, str_idx) : 0);
+
+      /* match with multibyte character?  */
+      for (i = 0; i < cset->nmbchars; ++i)
+	if (wc == cset->mbchars[i])
+	  {
+	    match_len = char_len;
+	    goto check_node_accept_bytes_match;
+	  }
+      /* match with character_class?  */
+      for (i = 0; i < cset->nchar_classes; ++i)
+	{
+	  wctype_t wt = cset->char_classes[i];
+	  if (__iswctype (wc, wt))
+	    {
+	      match_len = char_len;
+	      goto check_node_accept_bytes_match;
+	    }
+	}
+
+# ifdef _LIBC
+      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+      if (nrules != 0)
+	{
+	  unsigned int in_collseq = 0;
+	  const int32_t *table, *indirect;
+	  const unsigned char *weights, *extra;
+	  const char *collseqwc;
+	  int32_t idx;
+	  /* This #include defines a local function!  */
+#  include <locale/weight.h>
+
+	  /* match with collating_symbol?  */
+	  if (cset->ncoll_syms)
+	    extra = (const unsigned char *)
+	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+	  for (i = 0; i < cset->ncoll_syms; ++i)
+	    {
+	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
+	      /* Compare the length of input collating element and
+		 the length of current collating element.  */
+	      if (*coll_sym != elem_len)
+		continue;
+	      /* Compare each bytes.  */
+	      for (j = 0; j < *coll_sym; j++)
+		if (pin[j] != coll_sym[1 + j])
+		  break;
+	      if (j == *coll_sym)
+		{
+		  /* Match if every bytes is equal.  */
+		  match_len = j;
+		  goto check_node_accept_bytes_match;
+		}
+	    }
+
+	  if (cset->nranges)
+	    {
+	      if (elem_len <= char_len)
+		{
+		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+		  in_collseq = __collseq_table_lookup (collseqwc, wc);
+		}
+	      else
+		in_collseq = find_collation_sequence_value (pin, elem_len);
+	    }
+	  /* match with range expression?  */
+	  for (i = 0; i < cset->nranges; ++i)
+	    if (cset->range_starts[i] <= in_collseq
+		&& in_collseq <= cset->range_ends[i])
+	      {
+		match_len = elem_len;
+		goto check_node_accept_bytes_match;
+	      }
+
+	  /* match with equivalence_class?  */
+	  if (cset->nequiv_classes)
+	    {
+	      const unsigned char *cp = pin;
+	      table = (const int32_t *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+	      weights = (const unsigned char *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+	      extra = (const unsigned char *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+	      indirect = (const int32_t *)
+		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+	      idx = findidx (&cp);
+	      if (idx > 0)
+		for (i = 0; i < cset->nequiv_classes; ++i)
+		  {
+		    int32_t equiv_class_idx = cset->equiv_classes[i];
+		    size_t weight_len = weights[idx];
+		    if (weight_len == weights[equiv_class_idx])
+		      {
+			int cnt = 0;
+			while (cnt <= weight_len
+			       && (weights[equiv_class_idx + 1 + cnt]
+				   == weights[idx + 1 + cnt]))
+			  ++cnt;
+			if (cnt > weight_len)
+			  {
+			    match_len = elem_len;
+			    goto check_node_accept_bytes_match;
+			  }
+		      }
+		  }
+	    }
+	}
+      else
+# endif /* _LIBC */
+	{
+	  /* match with range expression?  */
+#if __GNUC__ >= 2
+	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+#else
+	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+	  cmp_buf[2] = wc;
+#endif
+	  for (i = 0; i < cset->nranges; ++i)
+	    {
+	      cmp_buf[0] = cset->range_starts[i];
+	      cmp_buf[4] = cset->range_ends[i];
+	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+		{
+		  match_len = char_len;
+		  goto check_node_accept_bytes_match;
+		}
+	    }
+	}
+    check_node_accept_bytes_match:
+      if (!cset->non_match)
+	return match_len;
+      else
+	{
+	  if (match_len > 0)
+	    return 0;
+	  else
+	    return (elem_len > char_len) ? elem_len : char_len;
+	}
+    }
+  return 0;
+}
+
+# ifdef _LIBC
+static unsigned int
+internal_function
+find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
+{
+  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+  if (nrules == 0)
+    {
+      if (mbs_len == 1)
+	{
+	  /* No valid character.  Match it as a single byte character.  */
+	  const unsigned char *collseq = (const unsigned char *)
+	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+	  return collseq[mbs[0]];
+	}
+      return UINT_MAX;
+    }
+  else
+    {
+      int32_t idx;
+      const unsigned char *extra = (const unsigned char *)
+	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+      int32_t extrasize = (const unsigned char *)
+	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
+
+      for (idx = 0; idx < extrasize;)
+	{
+	  int mbs_cnt, found = 0;
+	  int32_t elem_mbs_len;
+	  /* Skip the name of collating element name.  */
+	  idx = idx + extra[idx] + 1;
+	  elem_mbs_len = extra[idx++];
+	  if (mbs_len == elem_mbs_len)
+	    {
+	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
+		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
+		  break;
+	      if (mbs_cnt == elem_mbs_len)
+		/* Found the entry.  */
+		found = 1;
+	    }
+	  /* Skip the byte sequence of the collating element.  */
+	  idx += elem_mbs_len;
+	  /* Adjust for the alignment.  */
+	  idx = (idx + 3) & ~3;
+	  /* Skip the collation sequence value.  */
+	  idx += sizeof (uint32_t);
+	  /* Skip the wide char sequence of the collating element.  */
+	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+	  /* If we found the entry, return the sequence value.  */
+	  if (found)
+	    return *(uint32_t *) (extra + idx);
+	  /* Skip the collation sequence value.  */
+	  idx += sizeof (uint32_t);
+	}
+      return UINT_MAX;
+    }
+}
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+
+/* Check whether the node accepts the byte which is IDX-th
+   byte of the INPUT.  */
+
+static int
+internal_function
+check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
+		   int idx)
+{
+  unsigned char ch;
+  ch = re_string_byte_at (&mctx->input, idx);
+  switch (node->type)
+    {
+    case CHARACTER:
+      if (node->opr.c != ch)
+        return 0;
+      break;
+
+    case SIMPLE_BRACKET:
+      if (!bitset_contain (node->opr.sbcset, ch))
+        return 0;
+      break;
+
+#ifdef RE_ENABLE_I18N
+    case OP_UTF8_PERIOD:
+      if (ch >= 0x80)
+        return 0;
+      /* FALLTHROUGH */
+#endif
+    case OP_PERIOD:
+      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
+	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
+	return 0;
+      break;
+
+    default:
+      return 0;
+    }
+
+  if (node->constraint)
+    {
+      /* The node has constraints.  Check whether the current context
+	 satisfies the constraints.  */
+      unsigned int context = re_string_context_at (&mctx->input, idx,
+						   mctx->eflags);
+      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+	return 0;
+    }
+
+  return 1;
+}
+
+/* Extend the buffers, if the buffers have run out.  */
+
+static reg_errcode_t
+internal_function
+extend_buffers (re_match_context_t *mctx)
+{
+  reg_errcode_t ret;
+  re_string_t *pstr = &mctx->input;
+
+  /* Double the lengthes of the buffers.  */
+  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  if (mctx->state_log != NULL)
+    {
+      /* And double the length of state_log.  */
+      /* XXX We have no indication of the size of this buffer.  If this
+	 allocation fail we have no indication that the state_log array
+	 does not have the right size.  */
+      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
+					      pstr->bufs_len + 1);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      mctx->state_log = new_array;
+    }
+
+  /* Then reconstruct the buffers.  */
+  if (pstr->icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	{
+	  ret = build_wcs_upper_buffer (pstr);
+	  if (BE (ret != REG_NOERROR, 0))
+	    return ret;
+	}
+      else
+#endif /* RE_ENABLE_I18N  */
+	build_upper_buffer (pstr);
+    }
+  else
+    {
+#ifdef RE_ENABLE_I18N
+      if (pstr->mb_cur_max > 1)
+	build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
+	{
+	  if (pstr->trans != NULL)
+	    re_string_translate_buffer (pstr);
+	}
+    }
+  return REG_NOERROR;
+}
+
+
+/* Functions for matching context.  */
+
+/* Initialize MCTX.  */
+
+static reg_errcode_t
+internal_function
+match_ctx_init (re_match_context_t *mctx, int eflags, int n)
+{
+  mctx->eflags = eflags;
+  mctx->match_last = -1;
+  if (n > 0)
+    {
+      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
+      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
+	return REG_ESPACE;
+    }
+  /* Already zero-ed by the caller.
+     else
+       mctx->bkref_ents = NULL;
+     mctx->nbkref_ents = 0;
+     mctx->nsub_tops = 0;  */
+  mctx->abkref_ents = n;
+  mctx->max_mb_elem_len = 1;
+  mctx->asub_tops = n;
+  return REG_NOERROR;
+}
+
+/* Clean the entries which depend on the current input in MCTX.
+   This function must be invoked when the matcher changes the start index
+   of the input, or changes the input string.  */
+
+static void
+internal_function
+match_ctx_clean (re_match_context_t *mctx)
+{
+  int st_idx;
+  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
+    {
+      int sl_idx;
+      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
+      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
+	{
+	  re_sub_match_last_t *last = top->lasts[sl_idx];
+	  re_free (last->path.array);
+	  re_free (last);
+	}
+      re_free (top->lasts);
+      if (top->path)
+	{
+	  re_free (top->path->array);
+	  re_free (top->path);
+	}
+      free (top);
+    }
+
+  mctx->nsub_tops = 0;
+  mctx->nbkref_ents = 0;
+}
+
+/* Free all the memory associated with MCTX.  */
+
+static void
+internal_function
+match_ctx_free (re_match_context_t *mctx)
+{
+  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
+  match_ctx_clean (mctx);
+  re_free (mctx->sub_tops);
+  re_free (mctx->bkref_ents);
+}
+
+/* Add a new backreference entry to MCTX.
+   Note that we assume that caller never call this function with duplicate
+   entry, and call with STR_IDX which isn't smaller than any existing entry.
+*/
+
+static reg_errcode_t
+internal_function
+match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
+		     int to)
+{
+  if (mctx->nbkref_ents >= mctx->abkref_ents)
+    {
+      struct re_backref_cache_entry* new_entry;
+      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
+			      mctx->abkref_ents * 2);
+      if (BE (new_entry == NULL, 0))
+	{
+	  re_free (mctx->bkref_ents);
+	  return REG_ESPACE;
+	}
+      mctx->bkref_ents = new_entry;
+      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
+	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
+      mctx->abkref_ents *= 2;
+    }
+  if (mctx->nbkref_ents > 0
+      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
+    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
+
+  mctx->bkref_ents[mctx->nbkref_ents].node = node;
+  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
+  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
+  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+
+  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
+     If bit N is clear, means that this entry won't epsilon-transition to
+     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
+     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
+     such node.
+
+     A backreference does not epsilon-transition unless it is empty, so set
+     to all zeros if FROM != TO.  */
+  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
+    = (from == to ? ~0 : 0);
+
+  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
+  if (mctx->max_mb_elem_len < to - from)
+    mctx->max_mb_elem_len = to - from;
+  return REG_NOERROR;
+}
+
+/* Search for the first entry which has the same str_idx, or -1 if none is
+   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
+
+static int
+internal_function
+search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+{
+  int left, right, mid, last;
+  last = right = mctx->nbkref_ents;
+  for (left = 0; left < right;)
+    {
+      mid = (left + right) / 2;
+      if (mctx->bkref_ents[mid].str_idx < str_idx)
+	left = mid + 1;
+      else
+	right = mid;
+    }
+  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
+    return left;
+  else
+    return -1;
+}
+
+/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
+   at STR_IDX.  */
+
+static reg_errcode_t
+internal_function
+match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
+{
+#ifdef DEBUG
+  assert (mctx->sub_tops != NULL);
+  assert (mctx->asub_tops > 0);
+#endif
+  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
+    {
+      int new_asub_tops = mctx->asub_tops * 2;
+      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
+						   re_sub_match_top_t *,
+						   new_asub_tops);
+      if (BE (new_array == NULL, 0))
+	return REG_ESPACE;
+      mctx->sub_tops = new_array;
+      mctx->asub_tops = new_asub_tops;
+    }
+  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
+  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
+    return REG_ESPACE;
+  mctx->sub_tops[mctx->nsub_tops]->node = node;
+  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
+  return REG_NOERROR;
+}
+
+/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
+   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
+
+static re_sub_match_last_t *
+internal_function
+match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
+{
+  re_sub_match_last_t *new_entry;
+  if (BE (subtop->nlasts == subtop->alasts, 0))
+    {
+      int new_alasts = 2 * subtop->alasts + 1;
+      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
+						    re_sub_match_last_t *,
+						    new_alasts);
+      if (BE (new_array == NULL, 0))
+	return NULL;
+      subtop->lasts = new_array;
+      subtop->alasts = new_alasts;
+    }
+  new_entry = calloc (1, sizeof (re_sub_match_last_t));
+  if (BE (new_entry != NULL, 1))
+    {
+      subtop->lasts[subtop->nlasts] = new_entry;
+      new_entry->node = node;
+      new_entry->str_idx = str_idx;
+      ++subtop->nlasts;
+    }
+  return new_entry;
+}
+
+static void
+internal_function
+sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+	       re_dfastate_t **limited_sts, int last_node, int last_str_idx)
+{
+  sctx->sifted_states = sifted_sts;
+  sctx->limited_states = limited_sts;
+  sctx->last_node = last_node;
+  sctx->last_str_idx = last_str_idx;
+  re_node_set_init_empty (&sctx->limits);
+}
+
+
+/* Binary backward compatibility.  */
+#if _LIBC
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
+int re_max_failures = 2000;
+# endif
+#endif
+#endif
diff --git a/gkregex.h b/gkregex.h
new file mode 100644
index 0000000..807c404
--- /dev/null
+++ b/gkregex.h
@@ -0,0 +1,556 @@
+/* Definitions for data structures and routines for the regular
+   expression library.
+   Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+#include <sys/types.h>
+
+/* Allow the use in C++ code.  */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+   wide enough to hold a value of a pointer.  For most ANSI compilers
+   ptrdiff_t and size_t should be likely OK.  Still size of these two
+   types is 2 for Microsoft C.  Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+   recognize.  The set/not-set meanings are chosen so that Emacs syntax
+   remains the value 0.  The bits are given in alphabetical order, and
+   the definitions shifted by one from the previous bit; thus, when we
+   add or remove a bit, only one other definition need change.  */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+   If set, then such a \ quotes the following character.  */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+     literals.
+   If set, then \+ and \? are operators and + and ? are literals.  */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported.  They are:
+     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+   If not set, then character classes are not supported.  */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+     expressions, of course).
+   If this bit is not set, then it depends:
+        ^  is an anchor if it is at the beginning of a regular
+           expression or after an open-group or an alternation operator;
+        $  is an anchor if it is at the end of a regular expression, or
+           before a close-group or an alternation operator.
+
+   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+   POSIX draft 11.2 says that * etc. in leading positions is undefined.
+   We already implemented a previous draft which made those constructs
+   invalid, though, so we haven't changed the code back.  */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+     regardless of where they are in the pattern.
+   If this bit is not set, then special characters are special only in
+     some contexts; otherwise they are ordinary.  Specifically,
+     * + ? and intervals are only special when not after the beginning,
+     open-group, or alternation operator.  */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+     immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+   If not set, then it doesn't.  */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+   If not set, then it does.  */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+   If not set, they do.  */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+     interval, depending on RE_NO_BK_BRACES.
+   If not set, \{, \}, {, and } are literals.  */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+   If not set, they are.  */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+   If not set, newline is literal.  */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+     are literals.
+  If not set, then `\{...\}' defines an interval.  */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+   If not set, \(...\) defines a group, and ( and ) are literals.  */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+   If not set, then \<digit> is a back-reference.  */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+   If not set, then \| is an alternation operator, and | is literal.  */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+     than the starting range point, as in [z-a], is invalid.
+   If not set, then when ending range point collates higher than the
+     starting range point, the range is ignored.  */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+   If not set, then an unmatched ) is invalid.  */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+   without further backtracking.  */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+   If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+   If not set, and debugging was on, turn it off.
+   This only works if regex.c is compiled -DDEBUG.
+   We define this bit always, so that all that's needed to turn on
+   debugging is to recompile regex.c; the calling code can always have
+   this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+   a string of ordinary characters.  For example, the ERE 'a{1' is
+   treated as 'a\{1'.  */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+   for ^, because it is difficult to scan the regex backwards to find
+   whether ^ should be special.  */
+#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+   immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* If this bit is set, then no_sub will be set to 1 during
+   re_compile_pattern.  */
+#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+   some interfaces).  When a regexp is compiled, the syntax used is
+   stored in the pattern buffer, so changing this does not affect
+   already-compiled regexps.  */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+   (The [[[ comments delimit what gets put into the Texinfo file, so
+   don't delete them!)  */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK							\
+  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
+   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
+   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
+   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK						\
+  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
+   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS		\
+       | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK						\
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
+   | RE_INTERVALS	    | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP							\
+  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
+   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
+   | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP							\
+  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
+   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
+   | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP						\
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
+   | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax.  */
+#define _RE_SYNTAX_POSIX_COMMON						\
+  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
+   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC						\
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+   isn't minimal, since other operators, such as \`, aren't disabled.  */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED					\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
+   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+   removed and RE_NO_BK_REFS is added.  */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow.  Some systems
+   (erroneously) define this in other header files, but we want our
+   value, so remove any previous define.  */
+#ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp').  */
+
+/* If this bit is set, then use extended regular expression syntax.
+   If not set, then use basic regular expression syntax.  */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+     characters in the string.
+   If not set, then anchors do match at newlines.  */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+   If not set, then returns differ between not matching and errors.  */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec).  */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+     the beginning of the string (presumably because it's not the
+     beginning of a line).
+   If not set, then the beginning-of-line operator does match the
+     beginning of the string.  */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line.  */
+#define REG_NOTEOL (1 << 1)
+
+/* Use PMATCH[0] to delimit the start and end of the search in the
+   buffer.  */
+#define REG_STARTEND (1 << 2)
+
+
+/* If any error codes are removed, changed, or added, update the
+   `re_error_msg' table in regex.c.  */
+typedef enum
+{
+#ifdef _XOPEN_SOURCE
+  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
+#endif
+
+  REG_NOERROR = 0,	/* Success.  */
+  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
+
+  /* POSIX regcomp return error codes.  (In the order listed in the
+     standard.)  */
+  REG_BADPAT,		/* Invalid pattern.  */
+  REG_ECOLLATE,		/* Inalid collating element.  */
+  REG_ECTYPE,		/* Invalid character class name.  */
+  REG_EESCAPE,		/* Trailing backslash.  */
+  REG_ESUBREG,		/* Invalid back reference.  */
+  REG_EBRACK,		/* Unmatched left bracket.  */
+  REG_EPAREN,		/* Parenthesis imbalance.  */
+  REG_EBRACE,		/* Unmatched \{.  */
+  REG_BADBR,		/* Invalid contents of \{\}.  */
+  REG_ERANGE,		/* Invalid range end.  */
+  REG_ESPACE,		/* Ran out of memory.  */
+  REG_BADRPT,		/* No preceding re for repetition op.  */
+
+  /* Error codes we've added.  */
+  REG_EEND,		/* Premature end.  */
+  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
+  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern.  Before calling
+   the pattern compiler, the fields `buffer', `allocated', `fastmap',
+   `translate', and `no_sub' can be set.  After the pattern has been
+   compiled, the `re_nsub' field is available.  All other fields are
+   private to the regex routines.  */
+
+#ifndef RE_TRANSLATE_TYPE
+# define RE_TRANSLATE_TYPE unsigned char *
+#endif
+
+struct re_pattern_buffer
+{
+  /* Space that holds the compiled pattern.  It is declared as
+     `unsigned char *' because its elements are sometimes used as
+     array indexes.  */
+  unsigned char *buffer;
+
+  /* Number of bytes to which `buffer' points.  */
+  unsigned long int allocated;
+
+  /* Number of bytes actually used in `buffer'.  */
+  unsigned long int used;
+
+  /* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t syntax;
+
+  /* Pointer to a fastmap, if any, otherwise zero.  re_search uses the
+     fastmap, if there is one, to skip over impossible starting points
+     for matches.  */
+  char *fastmap;
+
+  /* Either a translate table to apply to all characters before
+     comparing them, or zero for no translation.  The translation is
+     applied to a pattern when it is compiled and to a string when it
+     is matched.  */
+  RE_TRANSLATE_TYPE translate;
+
+  /* Number of subexpressions found by the compiler.  */
+  size_t re_nsub;
+
+  /* Zero if this pattern cannot match the empty string, one else.
+     Well, in truth it's used only in `re_search_2', to see whether or
+     not we should use the fastmap, so we don't set this absolutely
+     perfectly; see `re_compile_fastmap' (the `duplicate' case).  */
+  unsigned can_be_null : 1;
+
+  /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+     for `max (RE_NREGS, re_nsub + 1)' groups.
+     If REGS_REALLOCATE, reallocate space if necessary.
+     If REGS_FIXED, use what's there.  */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+  unsigned regs_allocated : 2;
+
+  /* Set to zero when `regex_compile' compiles a pattern; set to one
+     by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned fastmap_accurate : 1;
+
+  /* If set, `re_match_2' does not return information about
+     subexpressions.  */
+  unsigned no_sub : 1;
+
+  /* If set, a beginning-of-line anchor doesn't match at the beginning
+     of the string.  */
+  unsigned not_bol : 1;
+
+  /* Similarly for an end-of-line anchor.  */
+  unsigned not_eol : 1;
+
+  /* If true, an anchor at a newline matches.  */
+  unsigned newline_anchor : 1;
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string.  POSIX mandates this.  */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in.  See
+   regex.texinfo for a full description of what registers match.  */
+struct re_registers
+{
+  unsigned num_regs;
+  regoff_t *start;
+  regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+   `re_match_2' returns information about at least this many registers
+   the first time a `regs' structure is passed.  */
+#ifndef RE_NREGS
+# define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers.  Aside from the different names than
+   `re_registers', POSIX uses an array of structures, instead of a
+   structure of arrays.  */
+typedef struct
+{
+  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+} regmatch_t;
+
+/* Declarations for routines.  */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+   You can also simply assign to the `re_syntax_options' variable.  */
+extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
+
+/* Compile the regular expression PATTERN, with length LENGTH
+   and syntax given by the global `re_syntax_options', into the buffer
+   BUFFER.  Return NULL if successful, and an error string if not.  */
+extern const char *re_compile_pattern (const char *__pattern, size_t __length,
+				       struct re_pattern_buffer *__buffer);
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+   accelerate searches.  Return 0 if successful and -2 if was an
+   internal error.  */
+extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+   compiled into BUFFER.  Start searching at position START, for RANGE
+   characters.  Return the starting position of the match, -1 for no
+   match, or -2 for an internal error.  Also return register
+   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+extern int re_search (struct re_pattern_buffer *__buffer, const char *__string,
+		      int __length, int __start, int __range,
+		      struct re_registers *__regs);
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+   STRING2.  Also, stop searching at index START + STOP.  */
+extern int re_search_2 (struct re_pattern_buffer *__buffer,
+			const char *__string1, int __length1,
+			const char *__string2, int __length2, int __start,
+			int __range, struct re_registers *__regs, int __stop);
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+   in BUFFER matched, starting at position START.  */
+extern int re_match (struct re_pattern_buffer *__buffer, const char *__string,
+		     int __length, int __start, struct re_registers *__regs);
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+extern int re_match_2 (struct re_pattern_buffer *__buffer,
+		       const char *__string1, int __length1,
+		       const char *__string2, int __length2, int __start,
+		       struct re_registers *__regs, int __stop);
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+   for recording register information.  STARTS and ENDS must be
+   allocated with malloc, and must each be at least `NUM_REGS * sizeof
+   (regoff_t)' bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+extern void re_set_registers (struct re_pattern_buffer *__buffer,
+			      struct re_registers *__regs,
+			      unsigned int __num_regs,
+			      regoff_t *__starts, regoff_t *__ends);
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
+/* 4.2 bsd compatibility.  */
+extern char *re_comp (const char *);
+extern int re_exec (const char *);
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+   "restrict", and "configure" may have defined "restrict".  */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+#  if defined restrict || 199901L <= __STDC_VERSION__
+#   define __restrict restrict
+#  else
+#   define __restrict
+#  endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax.  */
+#ifndef __restrict_arr
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
+     && !defined __GNUG__
+#  define __restrict_arr __restrict
+# else
+#  define __restrict_arr
+# endif
+#endif
+
+/* POSIX compatibility.  */
+extern int regcomp (regex_t *__restrict __preg,
+		    const char *__restrict __pattern,
+		    int __cflags);
+
+extern int regexec (const regex_t *__restrict __preg,
+		    const char *__restrict __string, size_t __nmatch,
+		    regmatch_t __pmatch[__restrict_arr],
+		    int __eflags);
+
+extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
+			char *__restrict __errbuf, size_t __errbuf_size);
+
+extern void regfree (regex_t *__preg);
+
+
+#ifdef __cplusplus
+}
+#endif	/* C++ */
+
+#endif /* regex.h */
diff --git a/graph.c b/graph.c
new file mode 100644
index 0000000..fa40f07
--- /dev/null
+++ b/graph.c
@@ -0,0 +1,1940 @@
+/*!
+ * \file 
+ *
+ * \brief Various routines with dealing with sparse graphs 
+ *
+ * \author George Karypis
+ * \version\verbatim $Id: graph.c 22415 2019-09-05 16:55:00Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+#define OMPMINOPS       50000
+
+/*************************************************************************/
+/*! Allocate memory for a graph and initializes it 
+    \returns the allocated graph. The various fields are set to NULL.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Create()
+{
+  gk_graph_t *graph;
+
+  graph = (gk_graph_t *)gk_malloc(sizeof(gk_graph_t), "gk_graph_Create: graph");
+
+  gk_graph_Init(graph);
+
+  return graph;
+}
+
+
+/*************************************************************************/
+/*! Initializes the graph.
+    \param graph is the graph to be initialized.
+*/
+/*************************************************************************/
+void gk_graph_Init(gk_graph_t *graph)
+{
+  memset(graph, 0, sizeof(gk_graph_t));
+  graph->nvtxs = -1;
+}
+
+
+/*************************************************************************/
+/*! Frees all the memory allocated for a graph.
+    \param graph is the graph to be freed.
+*/
+/*************************************************************************/
+void gk_graph_Free(gk_graph_t **graph)
+{
+  if (*graph == NULL)
+    return;
+  gk_graph_FreeContents(*graph);
+  gk_free((void **)graph, LTERM);
+}
+
+
+/*************************************************************************/
+/*! Frees only the memory allocated for the graph's different fields and
+    sets them to NULL.
+    \param graph is the graph whose contents will be freed.
+*/    
+/*************************************************************************/
+void gk_graph_FreeContents(gk_graph_t *graph)
+{
+  gk_free((void *)&graph->xadj, &graph->adjncy, 
+          &graph->iadjwgt, &graph->fadjwgt,
+          &graph->ivwgts, &graph->fvwgts,
+          &graph->ivsizes, &graph->fvsizes,
+          &graph->vlabels, 
+          LTERM);
+}
+
+
+/**************************************************************************/
+/*! Reads a sparse graph from the supplied file 
+    \param filename is the file that stores the data.
+    \param format is the graph format. The supported values are:
+           GK_GRAPH_FMT_METIS, GK_GRAPH_FMT_IJV.
+    \param hasvals is 1 if the input file has values
+    \param numbering is 1 if the input file numbering starts from one
+    \param isfewgts is 1 if the edge-weights should be read as floats
+    \param isfvwgts is 1 if the vertex-weights should be read as floats
+    \param isfvsizes is 1 if the vertex-sizes should be read as floats
+    \returns the graph that was read.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Read(char *filename, int format, int hasvals, 
+                int numbering, int isfewgts, int isfvwgts, int isfvsizes)
+{
+  ssize_t i, k, l;
+  size_t nfields, nvtxs, nedges, fmt, ncon, lnlen;
+  ssize_t *xadj;
+  int32_t ival, *iinds=NULL, *jinds=NULL, *ivals=NULL, *adjncy, *iadjwgt;
+  float fval, *fvals=NULL, *fadjwgt;
+  int readsizes=0, readwgts=0, readvals=0;
+  char *line=NULL, *head, *tail, fmtstr[256];
+  FILE *fpin=NULL;
+  gk_graph_t *graph=NULL;
+
+
+  if (!gk_fexists(filename)) 
+    gk_errexit(SIGERR, "File %s does not exist!\n", filename);
+
+  switch (format) {
+    case GK_GRAPH_FMT_METIS:
+      fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin");
+      do {
+        if (gk_getline(&line, &lnlen, fpin) <= 0)
+          gk_errexit(SIGERR, "Premature end of input file: file:%s\n", filename);
+      } while (line[0] == '%');
+
+      fmt = ncon = 0;
+      nfields = sscanf(line, "%zu %zu %zu %zu", &nvtxs, &nedges, &fmt, &ncon);
+      if (nfields < 2)
+        gk_errexit(SIGERR, "Header line must contain at least 2 integers (#vtxs and #edges).\n");
+
+      nedges *= 2;
+
+      if (fmt > 111)
+        gk_errexit(SIGERR, "Cannot read this type of file format [fmt=%zu]!\n", fmt);
+
+      sprintf(fmtstr, "%03zu", fmt%1000);
+      readsizes = (fmtstr[0] == '1');
+      readwgts  = (fmtstr[1] == '1');
+      readvals  = (fmtstr[2] == '1');
+      numbering = 1;
+      ncon      = (ncon == 0 ? 1 : ncon);
+
+      graph = gk_graph_Create();
+    
+      graph->nvtxs = nvtxs;
+    
+      graph->xadj   = gk_zmalloc(nvtxs+1, "gk_graph_Read: xadj");
+      graph->adjncy = gk_i32malloc(nedges, "gk_graph_Read: adjncy");
+      if (readvals) {
+        if (isfewgts)
+          graph->fadjwgt = gk_fmalloc(nedges, "gk_graph_Read: fadjwgt");
+        else
+          graph->iadjwgt = gk_i32malloc(nedges, "gk_graph_Read: iadjwgt");
+      }
+    
+      if (readsizes) {
+        if (isfvsizes)
+          graph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Read: fvsizes");
+        else
+          graph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Read: ivsizes");
+      }
+    
+      if (readwgts) {
+        if (isfvwgts)
+          graph->fvwgts = gk_fmalloc(nvtxs*ncon, "gk_graph_Read: fvwgts");
+        else
+          graph->ivwgts = gk_i32malloc(nvtxs*ncon, "gk_graph_Read: ivwgts");
+      }
+    
+    
+      /*----------------------------------------------------------------------
+       * Read the sparse graph file
+       *---------------------------------------------------------------------*/
+      numbering = (numbering ? - 1 : 0);
+      for (graph->xadj[0]=0, k=0, i=0; i<nvtxs; i++) {
+        do {
+          if (gk_getline(&line, &lnlen, fpin) == -1)
+            gk_errexit(SIGERR, "Pregraphure end of input file: file while reading row %d\n", i);
+        } while (line[0] == '%');
+    
+        head = line;
+        tail = NULL;
+    
+        /* Read vertex sizes */
+        if (readsizes) {
+          if (isfvsizes) {
+#ifdef __MSC__
+            graph->fvsizes[i] = (float)strtod(head, &tail);
+#else
+            graph->fvsizes[i] = strtof(head, &tail);
+#endif
+            if (tail == head)
+              gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
+            if (graph->fvsizes[i] < 0)
+              gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1);
+          }
+          else {
+            graph->ivsizes[i] = strtol(head, &tail, 0);
+            if (tail == head)
+              gk_errexit(SIGERR, "The line for vertex %zd does not have size information\n", i+1);
+            if (graph->ivsizes[i] < 0)
+              gk_errexit(SIGERR, "The size for vertex %zd must be >= 0\n", i+1);
+          }
+          head = tail;
+        }
+    
+        /* Read vertex weights */
+        if (readwgts) {
+          for (l=0; l<ncon; l++) {
+            if (isfvwgts) {
+#ifdef __MSC__
+              graph->fvwgts[i*ncon+l] = (float)strtod(head, &tail);
+#else
+              graph->fvwgts[i*ncon+l] = strtof(head, &tail);
+#endif
+              if (tail == head)
+                gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights "
+                        "for the %d constraints.\n", i+1, ncon);
+              if (graph->fvwgts[i*ncon+l] < 0)
+                gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
+            }
+            else {
+              graph->ivwgts[i*ncon+l] = strtol(head, &tail, 0);
+              if (tail == head)
+                gk_errexit(SIGERR, "The line for vertex %zd does not have enough weights "
+                        "for the %d constraints.\n", i+1, ncon);
+              if (graph->ivwgts[i*ncon+l] < 0)
+                gk_errexit(SIGERR, "The weight vertex %zd and constraint %zd must be >= 0\n", i+1, l);
+            }
+            head = tail;
+          }
+        }
+    
+       
+        /* Read the rest of the row */
+        while (1) {
+          ival = (int)strtol(head, &tail, 0);
+          if (tail == head) 
+            break;
+          head = tail;
+          
+          if ((graph->adjncy[k] = ival + numbering) < 0)
+            gk_errexit(SIGERR, "Error: Invalid column number %d at row %zd.\n", ival, i);
+    
+          if (readvals) {
+            if (isfewgts) {
+#ifdef __MSC__
+              fval = (float)strtod(head, &tail);
+#else
+        	  fval = strtof(head, &tail);
+#endif
+              if (tail == head)
+                gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k);
+    
+              graph->fadjwgt[k] = fval;
+            }
+            else {
+        	  ival = strtol(head, &tail, 0);
+              if (tail == head)
+                gk_errexit(SIGERR, "Value could not be found for edge! Vertex:%zd, NNZ:%zd\n", i, k);
+    
+              graph->iadjwgt[k] = ival;
+            }
+            head = tail;
+          }
+          k++;
+        }
+        graph->xadj[i+1] = k;
+      }
+    
+      if (k != nedges)
+        gk_errexit(SIGERR, "gk_graph_Read: Something wrong with the number of edges in "
+                           "the input file. nedges=%zd, Actualnedges=%zd.\n", nedges, k);
+    
+      gk_fclose(fpin);
+  
+      gk_free((void **)&line, LTERM);
+
+      break;
+
+    case GK_GRAPH_FMT_IJV:
+    case GK_GRAPH_FMT_HIJV:
+      gk_getfilestats(filename, &nvtxs, &nedges, NULL, NULL);
+
+      if (format == GK_GRAPH_FMT_HIJV) { /* remove the #rows/#cols values and row */
+        nedges -= 2; 
+        nvtxs  -= 1;
+      }
+
+      if (hasvals == 1 && 3*nvtxs != nedges)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 3.\n", nedges, hasvals);
+      if (hasvals == 0 && 2*nvtxs != nedges)
+        gk_errexit(SIGERR, "Error: The number of numbers (%zd %d) in the input file is not a multiple of 2.\n", nedges, hasvals);
+
+      nedges = nvtxs;
+      numbering = (numbering ? -1 : 0);
+
+      /* read the data into three arrays */
+      iinds = gk_i32malloc(nedges, "iinds");
+      jinds = gk_i32malloc(nedges, "jinds");
+      if (hasvals) {
+        if (isfewgts)
+          fvals = gk_fmalloc(nedges, "fvals");
+        else
+          ivals = gk_i32malloc(nedges, "ivals");
+      }
+
+      fpin = gk_fopen(filename, "r", "gk_graph_Read: fpin");
+
+      if (format == GK_GRAPH_FMT_HIJV) { /* read and ignore the #rows/#cols values */
+        if (fscanf(fpin, "%zd %zd", &i, &i) != 2)
+          gk_errexit(SIGERR, "Error: Failed to read the header line.\n");
+      }
+
+      for (nvtxs=0, i=0; i<nedges; i++) {
+        if (hasvals) {
+          if (isfewgts) {
+            if (fscanf(fpin, "%"PRId32" %"PRId32" %f", &iinds[i], &jinds[i], &fvals[i]) != 3)
+              gk_errexit(SIGERR, "Error: Failed to read (i, j, val) for nedge: %zd.\n", i);
+          }
+          else {
+            if (fscanf(fpin, "%"PRId32" %"PRId32" %"PRId32, &iinds[i], &jinds[i], &ivals[i]) != 3)
+              gk_errexit(SIGERR, "Error: Failed to read (i, j, val) for nedge: %zd.\n", i);
+          }
+        }
+        else {
+          if (fscanf(fpin, "%"PRId32" %"PRId32, &iinds[i], &jinds[i]) != 2)
+            gk_errexit(SIGERR, "Error: Failed to read (i, j) value for nedge: %zd.\n", i);
+        }
+        iinds[i] += numbering;
+        jinds[i] += numbering;
+
+        if (nvtxs < iinds[i])
+          nvtxs = iinds[i];
+        if (nvtxs < jinds[i])
+          nvtxs = jinds[i];
+      }
+      gk_fclose(fpin);
+
+      /* convert (i, j, v) into a graph format */
+      graph = gk_graph_Create();
+      graph->nvtxs  = ++nvtxs;
+      xadj   = graph->xadj   = gk_zsmalloc(nvtxs+1, 0, "xadj");
+      adjncy = graph->adjncy = gk_i32malloc(nedges, "adjncy");
+      if (hasvals) {
+        if (isfewgts)
+          fadjwgt = graph->fadjwgt = gk_fmalloc(nedges, "fadjwgt");
+        else
+          iadjwgt = graph->iadjwgt = gk_i32malloc(nedges, "iadjwgt");
+      }
+
+      for (i=0; i<nedges; i++)
+        xadj[iinds[i]]++;
+      MAKECSR(i, nvtxs, xadj);
+
+      for (i=0; i<nedges; i++) {
+        adjncy[xadj[iinds[i]]] = jinds[i];
+        if (hasvals) {
+          if (isfewgts)
+            fadjwgt[xadj[iinds[i]]] = fvals[i];
+          else
+            iadjwgt[xadj[iinds[i]]] = ivals[i];
+        }
+        xadj[iinds[i]]++;
+      }
+      SHIFTCSR(i, nvtxs, xadj);
+
+      gk_free((void **)&iinds, &jinds, &fvals, &ivals, LTERM);
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unrecognized format: %d\n", format);
+  }
+
+  return graph;
+}
+
+
+/**************************************************************************/
+/*! Writes a graph into a file.
+    \param graph is the graph to be written,
+    \param filename is the name of the output file.
+    \param format specifies the format of the output file.
+    \param numbering is either 0 or 1, indicating if the first vertex 
+           will be numbered 0 or 1. Some formats ignore this.
+*/
+/**************************************************************************/
+void gk_graph_Write(gk_graph_t *graph, char *filename, int format, int numbering)
+{
+  int32_t i;
+  ssize_t j;
+  int hasvwgts, hasvsizes, hasewgts;
+  FILE *fpout;
+
+  if (filename)
+    fpout = gk_fopen(filename, "w", "gk_graph_Write: fpout");
+  else
+    fpout = stdout; 
+
+
+  hasewgts  = (graph->iadjwgt || graph->fadjwgt);
+  hasvwgts  = (graph->ivwgts || graph->fvwgts);
+  hasvsizes = (graph->ivsizes || graph->fvsizes);
+
+  switch (format) {
+    case GK_GRAPH_FMT_METIS:
+      /* write the header line */
+      fprintf(fpout, "%d %zd", graph->nvtxs, graph->xadj[graph->nvtxs]/2);
+      if (hasvwgts || hasvsizes || hasewgts) 
+        fprintf(fpout, " %d%d%d", hasvsizes, hasvwgts, hasewgts);
+      fprintf(fpout, "\n");
+    
+    
+      for (i=0; i<graph->nvtxs; i++) {
+        if (hasvsizes) {
+          if (graph->ivsizes)
+            fprintf(fpout, " %d", graph->ivsizes[i]);
+          else
+            fprintf(fpout, " %f", graph->fvsizes[i]);
+        }
+    
+        if (hasvwgts) {
+          if (graph->ivwgts)
+            fprintf(fpout, " %d", graph->ivwgts[i]);
+          else
+            fprintf(fpout, " %f", graph->fvwgts[i]);
+        }
+    
+        for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) {
+          fprintf(fpout, " %d", graph->adjncy[j]+1);
+          if (hasewgts) {
+            if (graph->iadjwgt)
+              fprintf(fpout, " %d", graph->iadjwgt[j]);
+            else 
+              fprintf(fpout, " %f", graph->fadjwgt[j]);
+          }
+        }
+        fprintf(fpout, "\n");
+      }
+      break;
+
+    case GK_GRAPH_FMT_IJV:
+      for (i=0; i<graph->nvtxs; i++) {
+        for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) {
+          fprintf(fpout, "%d %d ", i+numbering, graph->adjncy[j]+numbering);
+          if (hasewgts) {
+            if (graph->iadjwgt)
+              fprintf(fpout, " %d\n", graph->iadjwgt[j]);
+            else 
+              fprintf(fpout, " %f\n", graph->fadjwgt[j]);
+          }
+          else {
+            fprintf(fpout, " 1\n");
+          }
+        }
+      }
+      break;
+
+    default:
+      gk_errexit(SIGERR, "Unknown file format. %d\n", format);
+  }
+
+  if (filename)
+    gk_fclose(fpout);
+}
+
+
+/*************************************************************************/
+/*! Returns a copy of a graph.
+    \param graph is the graph to be duplicated.
+    \returns the newly created copy of the graph.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Dup(gk_graph_t *graph)
+{
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs  = graph->nvtxs;
+
+  /* copy the adjacency structure */
+  if (graph->xadj)
+    ngraph->xadj = gk_zcopy(graph->nvtxs+1, graph->xadj, 
+                            gk_zmalloc(graph->nvtxs+1, "gk_graph_Dup: xadj"));
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivwgts"));
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivsizes"));
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Dup: ivlabels"));
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvwgts"));
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Dup: fvsizes"));
+
+
+  if (graph->adjncy)
+    ngraph->adjncy = gk_i32copy(graph->xadj[graph->nvtxs], graph->adjncy, 
+                            gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: adjncy"));
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32copy(graph->xadj[graph->nvtxs], graph->iadjwgt, 
+                            gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: iadjwgt"));
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fcopy(graph->xadj[graph->nvtxs], graph->fadjwgt, 
+                            gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Dup: fadjwgt"));
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns the transpose of a graph.
+    \param graph is the graph to be transposed.
+    \returns the newly created copy of the graph.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Transpose(gk_graph_t *graph)
+{
+  int32_t vi, vj;
+  ssize_t ei;
+
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs  = graph->nvtxs;
+  ngraph->xadj   = gk_zsmalloc(graph->nvtxs+1, 0, "gk_graph_Transpose: xadj");
+  ngraph->adjncy = gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: adjncy");
+
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32malloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: iadjwgt");
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fmalloc(graph->xadj[graph->nvtxs], "gk_graph_Transpose: fadjwgt");
+
+  for (vi=0; vi<graph->nvtxs; vi++) {
+    for (ei=graph->xadj[vi]; ei<graph->xadj[vi+1]; ei++)
+      ngraph->xadj[graph->adjncy[ei]]++;
+  }
+  MAKECSR(vi, ngraph->nvtxs, ngraph->xadj);
+
+  for (vi=0; vi<graph->nvtxs; vi++) {
+    for (ei=graph->xadj[vi]; ei<graph->xadj[vi+1]; ei++) {
+      vj = graph->adjncy[ei];
+      ngraph->adjncy[ngraph->xadj[vj]] = vi;
+      if (ngraph->iadjwgt)
+        ngraph->iadjwgt[ngraph->xadj[vj]] = graph->iadjwgt[ei];
+      if (ngraph->fadjwgt)
+        ngraph->fadjwgt[ngraph->xadj[vj]] = graph->fadjwgt[ei];
+      ngraph->xadj[vj]++;
+    }
+  }
+  SHIFTCSR(vi, ngraph->nvtxs, ngraph->xadj);
+
+  /* copy vertex attributes */
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32copy(graph->nvtxs, graph->ivwgts, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivwgts"));
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32copy(graph->nvtxs, graph->ivsizes, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivsizes"));
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32copy(graph->nvtxs, graph->vlabels, 
+                            gk_i32malloc(graph->nvtxs, "gk_graph_Transpose: ivlabels"));
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fcopy(graph->nvtxs, graph->fvwgts, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Transpose: fvwgts"));
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fcopy(graph->nvtxs, graph->fvsizes, 
+                            gk_fmalloc(graph->nvtxs, "gk_graph_Transpose: fvsizes"));
+
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns a subgraph containing a set of consecutive vertices.
+    \param graph is the original graph.
+    \param vstart is the starting vertex.
+    \param nvtxs is the number of vertices from vstart to extract.
+    \returns the newly created subgraph.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_ExtractSubgraph(gk_graph_t *graph, int vstart, int nvtxs)
+{
+  ssize_t i;
+  gk_graph_t *ngraph;
+
+  if (vstart+nvtxs > graph->nvtxs)
+    return NULL;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs  = nvtxs;
+
+  /* copy the adjancy structure */
+  if (graph->xadj)
+    ngraph->xadj = gk_zcopy(nvtxs+1, graph->xadj+vstart, 
+                              gk_zmalloc(nvtxs+1, "gk_graph_ExtractSubgraph: xadj"));
+  for (i=nvtxs; i>=0; i--)
+    ngraph->xadj[i] -= ngraph->xadj[0];
+  ASSERT(ngraph->xadj[0] == 0);
+
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32copy(nvtxs, graph->ivwgts+vstart, 
+                            gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivwgts"));
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32copy(nvtxs, graph->ivsizes+vstart, 
+                            gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: ivsizes"));
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32copy(nvtxs, graph->vlabels+vstart, 
+                            gk_i32malloc(nvtxs, "gk_graph_ExtractSubgraph: vlabels"));
+
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fcopy(nvtxs, graph->fvwgts+vstart, 
+                            gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvwgts"));
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fcopy(nvtxs, graph->fvsizes+vstart, 
+                            gk_fmalloc(nvtxs, "gk_graph_ExtractSubgraph: fvsizes"));
+
+
+  ASSERT(ngraph->xadj[nvtxs] == graph->xadj[vstart+nvtxs]-graph->xadj[vstart]);
+  if (graph->adjncy)
+    ngraph->adjncy = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], 
+                            graph->adjncy+graph->xadj[vstart], 
+                            gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart],
+                                       "gk_graph_ExtractSubgraph: adjncy"));
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32copy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], 
+                            graph->iadjwgt+graph->xadj[vstart], 
+                            gk_i32malloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart],
+                                       "gk_graph_ExtractSubgraph: iadjwgt"));
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fcopy(graph->xadj[vstart+nvtxs]-graph->xadj[vstart], 
+                            graph->fadjwgt+graph->xadj[vstart], 
+                            gk_fmalloc(graph->xadj[vstart+nvtxs]-graph->xadj[vstart],
+                                       "gk_graph_ExtractSubgraph: fadjwgt"));
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns a graph that has been reordered according to the permutation.
+    \param[IN] graph is the graph to be re-ordered.
+    \param[IN] perm is the new ordering of the graph's vertices
+    \param[IN] iperm is the original ordering of the re-ordered graph's vertices
+    \returns the newly created copy of the graph.
+
+    \note Either perm or iperm can be NULL but not both.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Reorder(gk_graph_t *graph, int32_t *perm, int32_t *iperm)
+{
+  ssize_t j, jj, *xadj;
+  int i, k, u, v, nvtxs;
+  int freeperm=0, freeiperm=0;
+  int32_t *adjncy;
+  gk_graph_t *ngraph;
+
+  if (perm == NULL && iperm == NULL)
+    return NULL;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nvtxs = nvtxs = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* allocate memory for the different structures that are present in graph */
+  if (graph->xadj)
+    ngraph->xadj = gk_zmalloc(nvtxs+1, "gk_graph_Reorder: xadj");
+
+  if (graph->ivwgts)
+    ngraph->ivwgts = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivwgts");
+
+  if (graph->ivsizes)
+    ngraph->ivsizes = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivsizes");
+
+  if (graph->vlabels)
+    ngraph->vlabels = gk_i32malloc(nvtxs, "gk_graph_Reorder: ivlabels");
+
+  if (graph->fvwgts)
+    ngraph->fvwgts = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvwgts");
+
+  if (graph->fvsizes)
+    ngraph->fvsizes = gk_fmalloc(nvtxs, "gk_graph_Reorder: fvsizes");
+
+
+  if (graph->adjncy)
+    ngraph->adjncy = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: adjncy");
+
+  if (graph->iadjwgt)
+    ngraph->iadjwgt = gk_i32malloc(graph->xadj[nvtxs], "gk_graph_Reorder: iadjwgt");
+
+  if (graph->fadjwgt)
+    ngraph->fadjwgt = gk_fmalloc(graph->xadj[nvtxs], "gk_graph_Reorder: fadjwgt");
+
+
+  /* create perm/iperm if not provided */
+  if (perm == NULL) {
+    freeperm = 1;
+    perm = gk_i32malloc(nvtxs, "gk_graph_Reorder: perm"); 
+    for (i=0; i<nvtxs; i++)
+      perm[iperm[i]] = i;
+  }
+  if (iperm == NULL) {
+    freeiperm = 1;
+    iperm = gk_i32malloc(nvtxs, "gk_graph_Reorder: iperm"); 
+    for (i=0; i<nvtxs; i++)
+      iperm[perm[i]] = i;
+  }
+
+  /* fill-in the information of the re-ordered graph */
+  ngraph->xadj[0] = jj = 0;
+  for (v=0; v<nvtxs; v++) {
+    u = iperm[v];
+    for (j=xadj[u]; j<xadj[u+1]; j++, jj++) {
+      ngraph->adjncy[jj] = perm[adjncy[j]];
+      if (graph->iadjwgt)
+        ngraph->iadjwgt[jj] = graph->iadjwgt[j];
+      if (graph->fadjwgt)
+        ngraph->fadjwgt[jj] = graph->fadjwgt[j];
+    }
+    if (graph->ivwgts)
+      ngraph->ivwgts[v] = graph->ivwgts[u];
+    if (graph->fvwgts)
+      ngraph->fvwgts[v] = graph->fvwgts[u];
+    if (graph->ivsizes)
+      ngraph->ivsizes[v] = graph->ivsizes[u];
+    if (graph->fvsizes)
+      ngraph->fvsizes[v] = graph->fvsizes[u];
+    if (graph->vlabels)
+      ngraph->vlabels[v] = graph->vlabels[u];
+
+    ngraph->xadj[v+1] = jj;
+  }
+
+
+  /* free memory */
+  if (freeperm)
+    gk_free((void **)&perm, LTERM);
+  if (freeiperm)
+    gk_free((void **)&iperm, LTERM);
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! This function finds the connected components in a graph.
+
+    \param graph is the graph structure
+    \param cptr is the ptr structure of the CSR representation of the 
+           components. The length of this vector must be graph->nvtxs+1.
+    \param cind is the indices structure of the CSR representation of 
+           the components. The length of this vector must be graph->nvtxs.
+
+    \returns the number of components that it found.
+
+    \note The cptr and cind parameters can be NULL, in which case only the
+          number of connected components is returned.
+*/
+/*************************************************************************/
+int gk_graph_FindComponents(gk_graph_t *graph, int32_t *cptr, int32_t *cind)
+{
+  ssize_t i, ii, j, jj, k, nvtxs, first, last, ntodo, ncmps;
+  ssize_t *xadj;
+  int32_t *adjncy, *pos, *todo;
+  int32_t mustfree_ccsr=0;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* Deal with NULL supplied cptr/cind vectors */
+  if (cptr == NULL) {
+    cptr = gk_i32malloc(nvtxs+1, "gk_graph_FindComponents: cptr");
+    cind = gk_i32malloc(nvtxs, "gk_graph_FindComponents: cind");
+    mustfree_ccsr = 1;
+  }
+
+  /* The list of vertices that have not been touched yet. 
+     The valid entries are from [0..ntodo). */
+  todo = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: todo"));
+
+  /* For a vertex that has not been visited, pos[i] is the position in the
+     todo list that this vertex is stored. 
+     If a vertex has been visited, pos[i] = -1. */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos"));
+
+
+  /* Find the connected componends */
+  ncmps = -1;
+  ntodo = nvtxs;     /* All vertices have not been visited */
+  first = last = 0;  /* Point to the first and last vertices that have been touched
+                        but not explored. 
+                        These vertices are stored in cind[first]...cind[last-1]. */
+  while (1) {
+    if (first == last) { /* Find another starting vertex */
+      cptr[++ncmps] = first;  /* Mark the end of the current CC */
+
+      if (ntodo > 0) {
+        /* put the first vertex in the todo list as the start of the new CC */
+        GKASSERT(pos[todo[0]] != -1);
+        cind[last++] = todo[0];
+
+        pos[todo[0]] = -1;
+        todo[0] = todo[--ntodo];
+        pos[todo[0]] = 0;
+      }
+      else {
+        break;
+      }
+    }
+
+    i = cind[first++];  /* Get the first visited but unexplored vertex */
+
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      if (pos[k] != -1) {
+        cind[last++] = k;
+
+        /* Remove k from the todo list and put the last item in the todo
+           list at the position that k was so that the todo list will be
+           consequtive. The pos[] array is updated accordingly to keep track
+           the location of the vertices in the todo[] list. */
+        todo[pos[k]] = todo[--ntodo];
+        pos[todo[pos[k]]] = pos[k];
+        pos[k] = -1;
+      }
+    }
+  }
+  GKASSERT(first == nvtxs);
+
+  if (mustfree_ccsr)
+    gk_free((void **)&cptr, &cind, LTERM);
+
+  gk_free((void **)&pos, &todo, LTERM);
+
+  return (int) ncmps;
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    breadth-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+    The algorithm used is a simplified version of the method used to find
+    the connected components.
+
+    \param[IN]  graph is the graph structure
+    \param[IN]  v is the starting vertex of the BFS
+    \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph.
+    \param[OUT] iperm[i] stores the ID of the vertex that corresponds to 
+                the ith vertex in the re-ordered graph.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_ComputeBFSOrdering(gk_graph_t *graph, int v, int32_t **r_perm,
+          int32_t **r_iperm)
+{
+  ssize_t j, *xadj;
+  int i, k, nvtxs, first, last;
+  int32_t *adjncy, *cot, *pos;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* This array will function like pos + touched of the CC method */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: pos"));
+
+  /* This array ([C]losed[O]pen[T]odo => cot) serves three purposes. 
+     Positions from [0...first) is the current iperm[] vector of the explored vertices; 
+     Positions from [first...last) is the OPEN list (i.e., visited vertices);
+     Positions from [last...nvtxs) is the todo list. */
+  cot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_ComputeBFSOrdering: cot"));
+
+
+  /* put v at the front of the todo list */
+  pos[0] = cot[0] = v;
+  pos[v] = cot[v] = 0;
+
+  /* compute a BFS ordering from the seed vertex */
+  first = last = 0;
+  while (first < nvtxs) {
+    if (first == last) { /* Find another starting vertex */
+      k = cot[last];
+      ASSERT(pos[k] != -1);
+      pos[k] = -1; /* mark node as being visited */
+      last++;
+    }
+
+    i = cot[first++];  /* the ++ advances the explored vertices */
+    for (j=xadj[i]; j<xadj[i+1]; j++) {
+      k = adjncy[j];
+      /* if a node has already been visited, its pos[] will be -1 */
+      if (pos[k] != -1) {
+        /* pos[k] is the location within cot[] where k resides (it is in the 'todo' part); 
+           It is placed in that location cot[last] (end of OPEN list) that we 
+           are about to overwrite and update pos[cot[last]] to reflect that. */
+        cot[pos[k]]    = cot[last]; /* put the head of the todo list to 
+                                       where k was in the todo list */
+        pos[cot[last]] = pos[k];    /* update perm to reflect the move */
+
+        cot[last++] = k;  /* put node at the end of the OPEN list */
+        pos[k]      = -1; /* mark node as being visited */
+      }
+    }
+  }
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    /* use the 'pos' array to build the perm array */
+    for (i=0; i<nvtxs; i++)
+      pos[cot[i]] = i;
+
+    *r_perm = pos;
+    pos = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    *r_iperm = cot;
+    cot = NULL;
+  }
+
+
+  /* cleanup memory */
+  gk_free((void **)&pos, &cot, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    best-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+
+    \param[IN]  graph is the graph structure.
+    \param[IN]  v is the starting vertex of the best-first traversal.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a vertex.
+    \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph.
+    \param[OUT] iperm[i] stores the ID of the vertex that corresponds to 
+                the ith vertex in the re-ordered graph.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_ComputeBestFOrdering0(gk_graph_t *graph, int v, int type, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  ssize_t j, jj, *xadj;
+  int i, k, u, nvtxs;
+  int32_t *adjncy, *perm, *degrees, *minIDs, *open;
+  gk_i32pq_t *queue;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* the degree of the vertices in the closed list */
+  degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees");
+
+  /* the minimum vertex ID of an open vertex to the closed list */ 
+  minIDs  = gk_i32smalloc(nvtxs, nvtxs+1, "gk_graph_ComputeBestFOrdering: minIDs");
+
+  /* the open list */ 
+  open  = gk_i32malloc(nvtxs, "gk_graph_ComputeBestFOrdering: open");
+
+  /* if perm[i] >= 0, then perm[i] is the order of vertex i; 
+     otherwise perm[i] == -1.
+  */
+  perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm");
+
+  /* create the queue and put everything in it */
+  queue = gk_i32pqCreate(nvtxs);
+  for (i=0; i<nvtxs; i++)
+    gk_i32pqInsert(queue, i, 0);
+  gk_i32pqUpdate(queue, v, 1);
+
+  open[0] = v;
+
+  /* start processing the nodes */
+  for (i=0; i<nvtxs; i++) {
+    if ((v = gk_i32pqGetTop(queue)) == -1) 
+      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);
+    if (perm[v] != -1)
+      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
+    perm[v] = i;
+
+
+    for (j=xadj[v]; j<xadj[v+1]; j++) {
+      u = adjncy[j];
+      if (perm[u] == -1) {
+        degrees[u]++;
+        minIDs[u] = (i < minIDs[u] ? i : minIDs[u]);
+
+        switch (type) {
+          case 1: /* DFS */
+            gk_i32pqUpdate(queue, u, 1);
+            break;
+          case 2: /* Max in closed degree */
+            gk_i32pqUpdate(queue, u, degrees[u]);
+            break;
+          case 3: /* Sum of orders in closed list */
+            for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) {
+              if (perm[adjncy[jj]] != -1)
+                k += perm[adjncy[jj]];
+            }
+            gk_i32pqUpdate(queue, u, k);
+            break;
+          case 4: /* Sum of order-differences (w.r.t. current number) in closed 
+                     list (updated once in a while) */
+            for (k=0, jj=xadj[u]; jj<xadj[u+1]; jj++) {
+              if (perm[adjncy[jj]] != -1)
+                k += (i-perm[adjncy[jj]]);
+            }
+            gk_i32pqUpdate(queue, u, k);
+            break;
+          default:
+            ;
+        }
+      }
+    }
+  }
+
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    *r_perm = perm;
+    perm = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    /* use the 'degrees' array to build the iperm array */
+    for (i=0; i<nvtxs; i++)
+      degrees[perm[i]] = i;
+
+    *r_iperm = degrees;
+    degrees = NULL;
+  }
+
+
+
+  /* cleanup memory */
+  gk_i32pqDestroy(queue);
+  gk_free((void **)&perm, &degrees, &minIDs, &open, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes a permutation of the vertices based on a
+    best-first-traversal. It can be used for re-ordering the graph
+    to reduce its bandwidth for better cache locality.
+
+    \param[IN]  graph is the graph structure.
+    \param[IN]  v is the starting vertex of the best-first traversal.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a vertex.
+    \param[OUT] perm[i] stores the ID of vertex i in the re-ordered graph.
+    \param[OUT] iperm[i] stores the ID of the vertex that corresponds to 
+                the ith vertex in the re-ordered graph.
+
+    \note The perm or iperm (but not both) can be NULL, at which point, 
+          the corresponding arrays are not returned. Though the program
+          works fine when both are NULL, doing that is not smart.
+          The returned arrays should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_ComputeBestFOrdering(gk_graph_t *graph, int v, int type, 
+          int32_t **r_perm, int32_t **r_iperm)
+{
+  ssize_t j, jj, *xadj;
+  int i, k, u, nvtxs, nopen, ntodo;
+  int32_t *adjncy, *perm, *degrees, *sod, *level, *ot, *pos;
+  int64_t *wdegrees;
+  gk_i32pq_t *queue;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* the degree of the vertices in the closed list */
+  degrees = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: degrees");
+
+  /* the weighted degree of the vertices in the closed list for type==3 */
+  wdegrees = gk_i64smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: wdegrees");
+
+  /* the sum of differences for type==4 */
+  sod = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: sod");
+
+  /* the encountering level of a vertex type==5 */
+  level = gk_i32smalloc(nvtxs, 0, "gk_graph_ComputeBestFOrdering: level");
+
+  /* The open+todo list of vertices. 
+     The vertices from [0..nopen] are the open vertices.
+     The vertices from [nopen..ntodo) are the todo vertices.
+     */
+  ot = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: ot"));
+
+  /* For a vertex that has not been explored, pos[i] is the position in the ot list. */
+  pos = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "gk_graph_FindComponents: pos"));
+
+  /* if perm[i] >= 0, then perm[i] is the order of vertex i; otherwise perm[i] == -1. */
+  perm = gk_i32smalloc(nvtxs, -1, "gk_graph_ComputeBestFOrdering: perm");
+
+  /* create the queue and put the starting vertex in it */
+  queue = gk_i32pqCreate(nvtxs);
+  gk_i32pqInsert(queue, v, 1);
+
+  /* put v at the front of the open list */
+  pos[0] = ot[0] = v;
+  pos[v] = ot[v] = 0;
+  nopen = 1;
+  ntodo = nvtxs;
+
+  /* start processing the nodes */
+  for (i=0; i<nvtxs; i++) {
+    if (nopen == 0) { /* deal with non-connected graphs */
+      gk_i32pqInsert(queue, ot[0], 1);  
+      nopen++;
+    }
+
+    if ((v = gk_i32pqGetTop(queue)) == -1)
+      gk_errexit(SIGERR, "The priority queue got empty ahead of time [i=%d].\n", i);
+
+    if (perm[v] != -1)
+      gk_errexit(SIGERR, "The perm[%d] has already been set.\n", v);
+    perm[v] = i;
+
+    if (ot[pos[v]] != v)
+      gk_errexit(SIGERR, "Something went wrong [ot[pos[%d]]!=%d.\n", v, v);
+    if (pos[v] >= nopen)
+      gk_errexit(SIGERR, "The position of v is not in open list. pos[%d]=%d is >=%d.\n", v, pos[v], nopen);
+
+    /* remove v from the open list and re-arrange the todo part of the list */
+    ot[pos[v]]       = ot[nopen-1];
+    pos[ot[nopen-1]] = pos[v];
+    if (ntodo > nopen) {
+      ot[nopen-1]      = ot[ntodo-1];
+      pos[ot[ntodo-1]] = nopen-1;
+    }
+    nopen--;
+    ntodo--;
+
+    for (j=xadj[v]; j<xadj[v+1]; j++) {
+      u = adjncy[j];
+      if (perm[u] == -1) {
+        /* update ot list, if u is not in the open list by putting it at the end
+           of the open list. */
+        if (degrees[u] == 0) {
+          ot[pos[u]]     = ot[nopen];
+          pos[ot[nopen]] = pos[u];
+          ot[nopen]      = u;
+          pos[u]         = nopen;
+          nopen++;
+
+          level[u] = level[v]+1;
+          gk_i32pqInsert(queue, u, 0);  
+        }
+
+
+        /* update the in-closed degree */
+        degrees[u]++;
+
+        /* update the queues based on the type */
+        switch (type) {
+          case 1: /* DFS */
+            gk_i32pqUpdate(queue, u, 1000*(i+1)+degrees[u]);
+            break;
+
+          case 2: /* Max in closed degree */
+            gk_i32pqUpdate(queue, u, degrees[u]);
+            break;
+
+          case 3: /* Sum of orders in closed list */
+            wdegrees[u] += i;
+            gk_i32pqUpdate(queue, u, (int32_t)sqrt(wdegrees[u]));
+            break;
+
+          case 4: /* Sum of order-differences */
+            /* this is handled at the end of the loop */
+            ;
+            break;
+
+          case 5: /* BFS with in degree priority */
+            gk_i32pqUpdate(queue, u, -(1000*level[u] - degrees[u]));
+            break;
+
+          case 6: /* Hybrid of 1+2 */
+            gk_i32pqUpdate(queue, u, (i+1)*degrees[u]);
+            break;
+
+          default:
+            ;
+        }
+      }
+    }
+
+    if (type == 4) { /* update all the vertices in the open list */
+      for (j=0; j<nopen; j++) {
+        u = ot[j];
+        if (perm[u] != -1)
+          gk_errexit(SIGERR, "For i=%d, the open list contains a closed vertex: ot[%zd]=%d, perm[%d]=%d.\n", i, j, u, u, perm[u]);
+        sod[u] += degrees[u];
+        if (i<1000 || i%25==0)
+          gk_i32pqUpdate(queue, u, sod[u]);
+      }
+    }
+
+    /*
+    for (j=0; j<ntodo; j++) {
+      if (pos[ot[j]] != j)
+        gk_errexit(SIGERR, "pos[ot[%zd]] != %zd.\n", j, j);
+    }
+    */
+
+  }
+
+
+  /* time to decide what to return */
+  if (r_perm != NULL) {
+    *r_perm = perm;
+    perm = NULL;
+  }
+
+  if (r_iperm != NULL) {
+    /* use the 'degrees' array to build the iperm array */
+    for (i=0; i<nvtxs; i++)
+      degrees[perm[i]] = i;
+
+    *r_iperm = degrees;
+    degrees = NULL;
+  }
+
+
+
+  /* cleanup memory */
+  gk_i32pqDestroy(queue);
+  gk_free((void **)&perm, &degrees, &wdegrees, &sod, &ot, &pos, &level, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! This function computes the single-source shortest path lengths from the
+    root node to all the other nodes in the graph. If the graph is not 
+    connected then, the sortest part to the vertices in the other components 
+    is -1.
+
+    \param[IN]  graph is the graph structure.
+    \param[IN]  v is the root of the single-source shortest path computations.
+    \param[IN]  type indicates the criteria to use to measure the 'bestness'
+                of a vertex.
+    \param[OUT] sps[i] stores the length of the shortest path from v to vertex i.
+                If no such path exists, then it is -1. Note that the returned
+                array will be either an array of int32_t or an array of floats.
+                The specific type is determined by the existance of non NULL
+                iadjwgt and fadjwgt arrays. If both of these arrays exist, then
+                priority is given to iadjwgt.
+
+    \note The returned array should be freed with gk_free().
+*/
+/*************************************************************************/
+void gk_graph_SingleSourceShortestPaths(gk_graph_t *graph, int v, void **r_sps)
+{
+  ssize_t *xadj;
+  int i, u, nvtxs;
+  int32_t *adjncy, *inqueue;
+
+  if (graph->nvtxs <= 0)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  inqueue = gk_i32smalloc(nvtxs, 0, "gk_graph_SingleSourceShortestPaths: inqueue");
+
+  /* determine if you will be computing using int32_t or float and proceed from there */
+  if (graph->iadjwgt != NULL) {
+    gk_i32pq_t *queue;
+    int32_t *adjwgt;
+    int32_t *sps;
+
+    adjwgt = graph->iadjwgt;
+
+    queue = gk_i32pqCreate(nvtxs);
+    gk_i32pqInsert(queue, v, 0);
+    inqueue[v] = 1;
+
+    sps = gk_i32smalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps");
+    sps[v] = 0;
+
+    /* start processing the nodes */
+    while ((v = gk_i32pqGetTop(queue)) != -1) {
+      inqueue[v] = 2;
+
+      /* relax the adjacent edges */
+      for (i=xadj[v]; i<xadj[v+1]; i++) {
+        u = adjncy[i];
+        if (inqueue[u] == 2)
+          continue;
+
+        if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) {
+          sps[u] = sps[v]+adjwgt[i];
+
+          if (inqueue[u])
+            gk_i32pqUpdate(queue, u, -sps[u]);
+          else {
+            gk_i32pqInsert(queue, u, -sps[u]);
+            inqueue[u] = 1;
+          }
+        }
+      }
+    }
+
+    *r_sps = (void *)sps;
+
+    gk_i32pqDestroy(queue);
+  }
+  else {
+    gk_fpq_t *queue;
+    float *adjwgt;
+    float *sps;
+
+    adjwgt = graph->fadjwgt;
+
+    queue = gk_fpqCreate(nvtxs);
+    gk_fpqInsert(queue, v, 0);
+    inqueue[v] = 1;
+
+    sps = gk_fsmalloc(nvtxs, -1, "gk_graph_SingleSourceShortestPaths: sps");
+    sps[v] = 0;
+
+    /* start processing the nodes */
+    while ((v = gk_fpqGetTop(queue)) != -1) {
+      inqueue[v] = 2;
+
+      /* relax the adjacent edges */
+      for (i=xadj[v]; i<xadj[v+1]; i++) {
+        u = adjncy[i];
+        if (inqueue[u] == 2)
+          continue;
+
+        if (sps[u] < 0 || sps[v]+adjwgt[i] < sps[u]) {
+          sps[u] = sps[v]+adjwgt[i];
+
+          if (inqueue[u])
+            gk_fpqUpdate(queue, u, -sps[u]);
+          else {
+            gk_fpqInsert(queue, u, -sps[u]);
+            inqueue[u] = 1;
+          }
+        }
+      }
+    }
+
+    *r_sps = (void *)sps;
+
+    gk_fpqDestroy(queue);
+  }
+
+  gk_free((void **)&inqueue, LTERM);
+
+}
+
+
+/*************************************************************************/
+/*! Sorts the adjacency lists in increasing vertex order
+    \param graph the graph itself,
+*/
+/**************************************************************************/
+void gk_graph_SortAdjacencies(gk_graph_t *graph)
+{
+  int32_t nvtxs, nn=0;
+  ssize_t *xadj;
+  int32_t *adjncy;
+  int32_t *iadjwgt;
+  float *fadjwgt;
+
+  nvtxs   = graph->nvtxs;
+  xadj    = graph->xadj;
+  adjncy  = graph->adjncy;
+  iadjwgt = graph->iadjwgt;
+  fadjwgt = graph->fadjwgt;
+
+  #pragma omp parallel if (nvtxs > 100)
+  {
+    ssize_t i, j, k;
+    gk_ikv_t *cand;
+    int32_t *itwgts=NULL;
+    float *ftwgts=NULL;
+
+    #pragma omp single
+    for (i=0; i<nvtxs; i++) 
+      nn = gk_max(nn, xadj[i+1]-xadj[i]);
+  
+    cand   = gk_ikvmalloc(nn, "gk_graph_SortIndices: cand");
+    if (iadjwgt)
+      itwgts = gk_i32malloc(nn, "gk_graph_SortIndices: itwgts");
+    if (fadjwgt)
+      ftwgts = gk_fmalloc(nn, "gk_graph_SortIndices: ftwgts");
+  
+    #pragma omp for schedule(static)
+    for (i=0; i<nvtxs; i++) {
+      for (k=0, j=xadj[i]; j<xadj[i+1]; j++) {
+        if (j > xadj[i] && adjncy[j] < adjncy[j-1])
+          k = 1; /* an inversion */
+        cand[j-xadj[i]].val = (int32_t)(j-xadj[i]);
+        cand[j-xadj[i]].key = adjncy[j];
+        if (itwgts)
+          itwgts[j-xadj[i]] = iadjwgt[j];
+        if (ftwgts)
+          ftwgts[j-xadj[i]] = fadjwgt[j];
+      }
+      if (k) {
+        gk_ikvsorti(xadj[i+1]-xadj[i], cand);
+        for (j=xadj[i]; j<xadj[i+1]; j++) {
+          adjncy[j] = cand[j-xadj[i]].key;
+          if (itwgts)
+            iadjwgt[j] = itwgts[cand[j-xadj[i]].val];
+          if (ftwgts)
+            fadjwgt[j] = ftwgts[cand[j-xadj[i]].val];
+        }
+      }
+    }
+
+    gk_free((void **)&cand, &itwgts, &ftwgts, LTERM);
+  }
+}
+
+
+/*************************************************************************/
+/*! Returns a symmetric version of a graph. The symmetric version
+    is constructed by applying an A op A^T operation, where op is one of
+    GK_GRAPH_SYM_SUM, GK_GRAPH_SYM_MIN, GK_GRAPH_SYM_MAX, GK_GRAPH_SYM_AVG.
+   
+    \param mat the matrix to be symmetrized,
+    \param op indicates the operation to be performed. The possible values are
+           GK_GRAPH_SYM_SUM, GK_GRAPH_SYM_MIN, GK_GRAPH_SYM_MAX, and GK_GRAPH_SYM_AVG.
+
+    \returns the symmetrized matrix consisting only of its row-based structure. 
+          The input matrix is not modified. 
+
+TODO: Need to deal with all vertex attributes that are currently do not get
+      copied over.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_MakeSymmetric(gk_graph_t *graph, int op)
+{
+  ssize_t i, j, k, nnz;
+  int nrows, nadj, hasvals;
+  ssize_t *rowptr, *colptr, *nrowptr;
+  int *rowind, *colind, *nrowind, *marker, *ids;
+  float *rowval=NULL, *colval=NULL, *nrowval=NULL, *wgts=NULL;
+  int32_t *irowval=NULL, *icolval=NULL, *nirowval=NULL, *iwgts=NULL;
+  gk_graph_t *ngraph;
+
+  hasvals = (graph->iadjwgt != NULL || graph->fadjwgt != NULL);
+
+  nrows  = graph->nvtxs;
+  rowptr = graph->xadj;
+  rowind = graph->adjncy;
+  if (hasvals) {
+    irowval = graph->iadjwgt;
+     rowval = graph->fadjwgt;
+  }
+
+  /* create the column view for efficient processing */
+  colptr = gk_zsmalloc(nrows+1, 0, "colptr");
+  colind = gk_i32malloc(rowptr[nrows], "colind");
+  if (hasvals) {
+    if (rowval)
+      colval = gk_fmalloc(rowptr[nrows], "colval");
+    if (irowval)
+      icolval = gk_i32malloc(rowptr[nrows], "icolval");
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      colptr[rowind[j]]++;
+  }
+  MAKECSR(i, nrows, colptr);
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      colind[colptr[rowind[j]]] = i;
+      if (hasvals) {
+        if (rowval)
+          colval[colptr[rowind[j]]] = rowval[j];
+        if (irowval)
+          icolval[colptr[rowind[j]]] = irowval[j];
+      }
+      colptr[rowind[j]]++;
+    }
+  }
+  SHIFTCSR(i, nrows, colptr);
+
+
+  ngraph = gk_graph_Create();
+  ngraph->nvtxs = graph->nvtxs;
+
+  nrowptr = ngraph->xadj = gk_zmalloc(nrows+1, "gk_csr_MakeSymmetric: nrowptr");
+  nrowind = ngraph->adjncy = gk_imalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowind");
+  if (hasvals) {
+    if (rowval)
+      nrowval = graph->fadjwgt = gk_fmalloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval");
+    if (irowval)
+      nirowval = graph->iadjwgt = gk_i32malloc(2*rowptr[nrows], "gk_csr_MakeSymmetric: nrowval");
+  }
+
+  marker = gk_ismalloc(nrows, -1, "marker");
+  ids    = gk_imalloc(nrows, "ids");
+  if (hasvals) {
+    if (rowval)
+      wgts = gk_fmalloc(nrows, "wgts");
+    if (irowval)
+      iwgts = gk_i32malloc(nrows, "wgts");
+  }
+
+  nrowptr[0] = nnz = 0;
+  for (i=0; i<nrows; i++) {
+    nadj = 0;
+    /* out-edges */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      ids[nadj] = rowind[j]; 
+      if (wgts)
+        wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*rowval[j] : rowval[j]);
+      if (iwgts)
+        iwgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*irowval[j] : irowval[j]);
+      marker[rowind[j]] = nadj++;
+    }
+
+    /* in-edges */
+    for (j=colptr[i]; j<colptr[i+1]; j++) {
+      if (marker[colind[j]] == -1) {
+        if (op != GK_CSR_SYM_MIN) {
+          ids[nadj] = colind[j]; 
+          if (wgts) 
+            wgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*colval[j] : colval[j]);
+          if (iwgts) 
+            iwgts[nadj] = (op == GK_CSR_SYM_AVG ? 0.5*icolval[j] : icolval[j]);
+          nadj++;
+        }
+      }
+      else {
+        if (wgts) {
+          switch (op) {
+            case GK_CSR_SYM_MAX:
+              wgts[marker[colind[j]]] = gk_max(colval[j], wgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_MIN:
+              wgts[marker[colind[j]]] = gk_min(colval[j], wgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_SUM:
+              wgts[marker[colind[j]]] += colval[j];
+              break;
+            case GK_CSR_SYM_AVG:
+              wgts[marker[colind[j]]] = 0.5*(wgts[marker[colind[j]]] + colval[j]);
+              break;
+            default:
+              errexit("Unsupported op for MakeSymmetric!\n");
+          }
+        }
+        if (iwgts) {
+          switch (op) {
+            case GK_CSR_SYM_MAX:
+              iwgts[marker[colind[j]]] = gk_max(icolval[j], iwgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_MIN:
+              iwgts[marker[colind[j]]] = gk_min(icolval[j], iwgts[marker[colind[j]]]);
+              break;
+            case GK_CSR_SYM_SUM:
+              iwgts[marker[colind[j]]] += icolval[j];
+              break;
+            case GK_CSR_SYM_AVG:
+              iwgts[marker[colind[j]]] = 0.5*(wgts[marker[colind[j]]] + icolval[j]);
+              break;
+            default:
+              errexit("Unsupported op for MakeSymmetric!\n");
+          }
+        }
+        marker[colind[j]] = -1;
+      }
+    }
+
+    /* go over out edges again to resolve any edges that were not found in the in
+     * edges */
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      if (marker[rowind[j]] != -1) {
+        if (op == GK_CSR_SYM_MIN)
+          ids[marker[rowind[j]]] = -1;
+        marker[rowind[j]] = -1;
+      }
+    }
+
+    /* put the non '-1' entries in ids[] into i's row */
+    for (j=0; j<nadj; j++) {
+      if (ids[j] != -1) {
+        nrowind[nnz] = ids[j];
+        if (wgts)
+          nrowval[nnz] = wgts[j];
+        if (iwgts)
+          nirowval[nnz] = iwgts[j];
+        nnz++;
+      }
+    }
+    nrowptr[i+1] = nnz;
+  }
+
+  gk_free((void **)&colptr, &colind, &colval, &icolval, &marker, &ids, &wgts, &iwgts, LTERM);
+
+  return ngraph;
+}
+
+
+
+#ifdef XXX
+
+/*************************************************************************/
+/*! Returns a subgraphrix containing a certain set of rows.
+    \param graph is the original graphrix.
+    \param nrows is the number of rows to extract.
+    \param rind is the set of row numbers to extract.
+    \returns the row structure of the newly created subgraphrix.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_ExtractRows(gk_graph_t *graph, int nrows, int *rind)
+{
+  ssize_t i, ii, j, nnz;
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nrows = nrows;
+  ngraph->ncols = graph->ncols;
+
+  for (nnz=0, i=0; i<nrows; i++)  
+    nnz += graph->rowptr[rind[i]+1]-graph->rowptr[rind[i]];
+
+  ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr");
+  ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind");
+  ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval");
+
+  ngraph->rowptr[0] = 0;
+  for (nnz=0, j=0, ii=0; ii<nrows; ii++) {
+    i = rind[ii];
+    gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz);
+    gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz);
+    nnz += graph->rowptr[i+1]-graph->rowptr[i];
+    ngraph->rowptr[++j] = nnz;
+  }
+  ASSERT(j == ngraph->nrows);
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Returns a subgraphrix corresponding to a specified partitioning of rows.
+    \param graph is the original graphrix.
+    \param part is the partitioning vector of the rows.
+    \param pid is the partition ID that will be extracted.
+    \returns the row structure of the newly created subgraphrix.
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_ExtractPartition(gk_graph_t *graph, int *part, int pid)
+{
+  ssize_t i, j, nnz;
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+
+  ngraph->nrows = 0;
+  ngraph->ncols = graph->ncols;
+
+  for (nnz=0, i=0; i<graph->nrows; i++) {
+    if (part[i] == pid) {
+      ngraph->nrows++;
+      nnz += graph->rowptr[i+1]-graph->rowptr[i];
+    }
+  }
+
+  ngraph->rowptr = gk_zmalloc(ngraph->nrows+1, "gk_graph_ExtractPartition: rowptr");
+  ngraph->rowind = gk_imalloc(nnz, "gk_graph_ExtractPartition: rowind");
+  ngraph->rowval = gk_fmalloc(nnz, "gk_graph_ExtractPartition: rowval");
+
+  ngraph->rowptr[0] = 0;
+  for (nnz=0, j=0, i=0; i<graph->nrows; i++) {
+    if (part[i] == pid) {
+      gk_icopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowind+graph->rowptr[i], ngraph->rowind+nnz);
+      gk_fcopy(graph->rowptr[i+1]-graph->rowptr[i], graph->rowval+graph->rowptr[i], ngraph->rowval+nnz);
+      nnz += graph->rowptr[i+1]-graph->rowptr[i];
+      ngraph->rowptr[++j] = nnz;
+    }
+  }
+  ASSERT(j == ngraph->nrows);
+
+  return ngraph;
+}
+
+
+/*************************************************************************/
+/*! Splits the graphrix into multiple sub-graphrices based on the provided
+    color array.
+    \param graph is the original graphrix.
+    \param color is an array of size equal to the number of non-zeros
+           in the graphrix (row-wise structure). The graphrix is split into
+           as many parts as the number of colors. For meaningfull results,
+           the colors should be numbered consecutively starting from 0.
+    \returns an array of graphrices for each supplied color number.
+*/
+/**************************************************************************/
+gk_graph_t **gk_graph_Split(gk_graph_t *graph, int *color)
+{
+  ssize_t i, j;
+  int nrows, ncolors;
+  ssize_t *rowptr;
+  int *rowind;
+  float *rowval;
+  gk_graph_t **sgraphs;
+
+  nrows  = graph->nrows;
+  rowptr = graph->rowptr;
+  rowind = graph->rowind;
+  rowval = graph->rowval;
+
+  ncolors = gk_imax(rowptr[nrows], color)+1;
+
+  sgraphs = (gk_graph_t **)gk_malloc(sizeof(gk_graph_t *)*ncolors, "gk_graph_Split: sgraphs");
+  for (i=0; i<ncolors; i++) {
+    sgraphs[i] = gk_graph_Create();
+    sgraphs[i]->nrows  = graph->nrows;
+    sgraphs[i]->ncols  = graph->ncols;
+    sgraphs[i]->rowptr = gk_zsmalloc(nrows+1, 0, "gk_graph_Split: sgraphs[i]->rowptr"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) 
+      sgraphs[color[j]]->rowptr[i]++;
+  }
+  for (i=0; i<ncolors; i++) 
+    MAKECSR(j, nrows, sgraphs[i]->rowptr);
+
+  for (i=0; i<ncolors; i++) {
+    sgraphs[i]->rowind = gk_imalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowind"); 
+    sgraphs[i]->rowval = gk_fmalloc(sgraphs[i]->rowptr[nrows], "gk_graph_Split: sgraphs[i]->rowval"); 
+  }
+
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      sgraphs[color[j]]->rowind[sgraphs[color[j]]->rowptr[i]] = rowind[j];
+      sgraphs[color[j]]->rowval[sgraphs[color[j]]->rowptr[i]] = rowval[j];
+      sgraphs[color[j]]->rowptr[i]++;
+    }
+  }
+
+  for (i=0; i<ncolors; i++) 
+    SHIFTCSR(j, nrows, sgraphs[i]->rowptr);
+
+  return sgraphs;
+}
+
+
+/*************************************************************************/
+/*! Prunes certain rows/columns of the graphrix. The prunning takes place 
+    by analyzing the row structure of the graphrix. The prunning takes place
+    by removing rows/columns but it does not affect the numbering of the
+    remaining rows/columns.
+   
+    \param graph the graphrix to be prunned,
+    \param what indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL)
+           of the graphrix will be prunned,
+    \param minf is the minimum number of rows (columns) that a column (row) must
+           be present in order to be kept,
+    \param maxf is the maximum number of rows (columns) that a column (row) must
+          be present at in order to be kept.
+    \returns the prunned graphrix consisting only of its row-based structure. 
+          The input graphrix is not modified. 
+*/
+/**************************************************************************/
+gk_graph_t *gk_graph_Prune(gk_graph_t *graph, int what, int minf, int maxf)
+{
+  ssize_t i, j, nnz;
+  int nrows, ncols;
+  ssize_t *rowptr, *nrowptr;
+  int *rowind, *nrowind, *collen;
+  float *rowval, *nrowval;
+  gk_graph_t *ngraph;
+
+  ngraph = gk_graph_Create();
+  
+  nrows = ngraph->nrows = graph->nrows;
+  ncols = ngraph->ncols = graph->ncols;
+
+  rowptr = graph->rowptr;
+  rowind = graph->rowind;
+  rowval = graph->rowval;
+
+  nrowptr = ngraph->rowptr = gk_zmalloc(nrows+1, "gk_graph_Prune: nrowptr");
+  nrowind = ngraph->rowind = gk_imalloc(rowptr[nrows], "gk_graph_Prune: nrowind");
+  nrowval = ngraph->rowval = gk_fmalloc(rowptr[nrows], "gk_graph_Prune: nrowval");
+
+
+  switch (what) {
+    case GK_CSR_COL:
+      collen = gk_ismalloc(ncols, 0, "gk_graph_Prune: collen");
+
+      for (i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          ASSERT(rowind[j] < ncols);
+          collen[rowind[j]]++;
+        }
+      }
+      for (i=0; i<ncols; i++)
+        collen[i] = (collen[i] >= minf && collen[i] <= maxf ? 1 : 0);
+
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+          if (collen[rowind[j]]) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+            nnz++;
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      gk_free((void **)&collen, LTERM);
+      break;
+
+    case GK_CSR_ROW:
+      nrowptr[0] = 0;
+      for (nnz=0, i=0; i<nrows; i++) {
+        if (rowptr[i+1]-rowptr[i] >= minf && rowptr[i+1]-rowptr[i] <= maxf) {
+          for (j=rowptr[i]; j<rowptr[i+1]; j++, nnz++) {
+            nrowind[nnz] = rowind[j];
+            nrowval[nnz] = rowval[j];
+          }
+        }
+        nrowptr[i+1] = nnz;
+      }
+      break;
+
+    default:
+      gk_graph_Free(&ngraph);
+      gk_errexit(SIGERR, "Unknown prunning type of %d\n", what);
+      return NULL;
+  }
+
+  return ngraph;
+}
+
+
+
+/*************************************************************************/
+/*! Normalizes the rows/columns of the graphrix to be unit 
+    length.
+    \param graph the graphrix itself,
+    \param what indicates what will be normalized and is obtained by
+           specifying GK_CSR_ROW, GK_CSR_COL, GK_CSR_ROW|GK_CSR_COL. 
+    \param norm indicates what norm is to normalize to, 1: 1-norm, 2: 2-norm
+*/
+/**************************************************************************/
+void gk_graph_Normalize(gk_graph_t *graph, int what, int norm)
+{
+  ssize_t i, j;
+  int n;
+  ssize_t *ptr;
+  float *val, sum;
+
+  if (what&GK_CSR_ROW && graph->rowval) {
+    n   = graph->nrows;
+    ptr = graph->rowptr;
+    val = graph->rowval;
+
+    #pragma omp parallel if (ptr[n] > OMPMINOPS) 
+    {
+      #pragma omp for private(j,sum) schedule(static)
+      for (i=0; i<n; i++) {
+        for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++){
+  	if (norm == 2)
+  	  sum += val[j]*val[j];
+  	else if (norm == 1)
+  	  sum += val[j]; /* assume val[j] > 0 */ 
+        }
+        if (sum > 0) {
+  	if (norm == 2)
+  	  sum=1.0/sqrt(sum); 
+  	else if (norm == 1)
+  	  sum=1.0/sum; 
+          for (j=ptr[i]; j<ptr[i+1]; j++)
+            val[j] *= sum;
+  	
+        }
+      }
+    }
+  }
+
+  if (what&GK_CSR_COL && graph->colval) {
+    n   = graph->ncols;
+    ptr = graph->colptr;
+    val = graph->colval;
+
+    #pragma omp parallel if (ptr[n] > OMPMINOPS)
+    {
+    #pragma omp for private(j,sum) schedule(static)
+      for (i=0; i<n; i++) {
+        for (sum=0.0, j=ptr[i]; j<ptr[i+1]; j++)
+  	if (norm == 2)
+  	  sum += val[j]*val[j];
+  	else if (norm == 1)
+  	  sum += val[j]; 
+        if (sum > 0) {
+  	if (norm == 2)
+  	  sum=1.0/sqrt(sum); 
+  	else if (norm == 1)
+  	  sum=1.0/sum; 
+          for (j=ptr[i]; j<ptr[i+1]; j++)
+            val[j] *= sum;
+        }
+      }
+    }
+  }
+}
+
+
+#endif
diff --git a/htable.c b/htable.c
new file mode 100644
index 0000000..078e114
--- /dev/null
+++ b/htable.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2004, Regents of the University of Minnesota
+ *
+ * This file contains routines for manipulating a direct-access hash table
+ *
+ * Started 3/22/04
+ * George
+ *
+ */
+
+#include <GKlib.h>
+
+/******************************************************************************
+* This function creates the hash-table
+*******************************************************************************/
+gk_HTable_t *HTable_Create(int nelements)
+{
+  gk_HTable_t *htable;
+
+  htable            = gk_malloc(sizeof(gk_HTable_t), "HTable_Create: htable");
+  htable->harray    = gk_ikvmalloc(nelements, "HTable_Create: harray");
+  htable->nelements = nelements;
+
+  HTable_Reset(htable);
+
+  return htable;
+}
+
+
+/******************************************************************************
+* This function resets the data-structures associated with the hash-table
+*******************************************************************************/
+void HTable_Reset(gk_HTable_t *htable)
+{
+  int i;
+
+  for (i=0; i<htable->nelements; i++)
+    htable->harray[i].key = HTABLE_EMPTY;
+  htable->htsize = 0;
+
+}
+
+/******************************************************************************
+* This function resizes the hash-table
+*******************************************************************************/
+void HTable_Resize(gk_HTable_t *htable, int nelements)
+{
+  int i, old_nelements;
+  gk_ikv_t *old_harray;
+
+  old_nelements = htable->nelements;
+  old_harray = htable->harray;
+
+  /* prepare larger hash */
+  htable->nelements = nelements;
+  htable->htsize = 0;
+  htable->harray = gk_ikvmalloc(nelements, "HTable_Resize: harray");
+  for (i=0; i<nelements; i++)
+    htable->harray[i].key = HTABLE_EMPTY;
+
+  /* reassign the values */
+  for (i=0; i<old_nelements; i++)
+    if (old_harray[i].key != HTABLE_EMPTY)
+       HTable_Insert(htable, old_harray[i].key, old_harray[i].val);
+
+  /* remove old harray */
+  gk_free((void **)&old_harray, LTERM);
+}
+
+
+/******************************************************************************
+* This function inserts a key-value pair in the array
+*******************************************************************************/
+void HTable_Insert(gk_HTable_t *htable, int key, int val)
+{
+  int i, first;
+
+  if (htable->htsize > htable->nelements/2)
+    HTable_Resize(htable, 2*htable->nelements);
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) {
+      htable->harray[i].key = key;
+      htable->harray[i].val = val;
+      htable->htsize++;
+      return;
+    }
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == HTABLE_EMPTY || htable->harray[i].key == HTABLE_DELETED) {
+      htable->harray[i].key = key;
+      htable->harray[i].val = val;
+      htable->htsize++;
+      return;
+    }
+  }
+
+}
+
+
+/******************************************************************************
+* This function deletes key from the htable
+*******************************************************************************/
+void HTable_Delete(gk_HTable_t *htable, int key)
+{
+  int i, first;
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return;
+    }
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return;
+    }
+  }
+
+}
+
+
+/******************************************************************************
+* This function returns the data associated with the key in the hastable
+*******************************************************************************/
+int HTable_Search(gk_HTable_t *htable, int key)
+{
+  int i, first;
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == key) 
+      return htable->harray[i].val;
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      return -1;
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == key) 
+      return htable->harray[i].val;
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      return -1;
+  }
+
+  return -1;
+}
+
+
+/******************************************************************************
+* This function returns the next key/val
+*******************************************************************************/
+int HTable_GetNext(gk_HTable_t *htable, int key, int *r_val, int type)
+{
+  int i;
+  static int first, last;
+
+  if (type == HTABLE_FIRST)
+    first = last = HTable_HFunction(htable->nelements, key);
+
+  if (first > last) {
+    for (i=first; i<htable->nelements; i++) {
+      if (htable->harray[i].key == key) {
+        *r_val = htable->harray[i].val;
+        first = i+1;
+        return 1;
+      }
+      else if (htable->harray[i].key == HTABLE_EMPTY)
+        return -1;
+    }
+    first = 0;
+  }
+
+  for (i=first; i<last; i++) {
+    if (htable->harray[i].key == key) {
+      *r_val = htable->harray[i].val;
+      first = i+1;
+      return 1;
+    }
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      return -1;
+  }
+
+  return -1;
+}
+
+
+/******************************************************************************
+* This function returns the data associated with the key in the hastable
+*******************************************************************************/
+int HTable_SearchAndDelete(gk_HTable_t *htable, int key)
+{
+  int i, first;
+
+  first = HTable_HFunction(htable->nelements, key);
+
+  for (i=first; i<htable->nelements; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return htable->harray[i].val;
+    }
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n");
+  }
+
+  for (i=0; i<first; i++) {
+    if (htable->harray[i].key == key) {
+      htable->harray[i].key = HTABLE_DELETED;
+      htable->htsize--;
+      return htable->harray[i].val;
+    }
+    else if (htable->harray[i].key == HTABLE_EMPTY)
+      gk_errexit(SIGERR, "HTable_SearchAndDelete: Failed to find the key!\n");
+  }
+
+  return -1;
+
+}
+
+
+
+/******************************************************************************
+* This function destroys the data structures associated with the hash-table
+*******************************************************************************/
+void HTable_Destroy(gk_HTable_t *htable)
+{
+  gk_free((void **)&htable->harray, &htable, LTERM);
+}
+
+
+/******************************************************************************
+* This is the hash-function. Based on multiplication
+*******************************************************************************/
+int HTable_HFunction(int nelements, int key)
+{
+  return (int)(key%nelements);
+}
diff --git a/io.c b/io.c
new file mode 100644
index 0000000..289b401
--- /dev/null
+++ b/io.c
@@ -0,0 +1,681 @@
+/*!
+\file  io.c
+\brief Various file I/O functions.
+
+This file contains various functions that perform I/O.
+
+\date Started 4/10/95
+\author George
+\version\verbatim $Id: io.c 18951 2015-08-08 20:10:46Z karypis $ \endverbatim
+*/
+
+#ifdef HAVE_GETLINE
+/* Get getline to be defined. */
+#define _GNU_SOURCE
+#include <stdio.h>
+#undef _GNU_SOURCE
+#endif
+
+#include <GKlib.h>
+
+/*************************************************************************
+* This function opens a file
+**************************************************************************/
+FILE *gk_fopen(char *fname, char *mode, const char *msg)
+{
+  FILE *fp;
+  char errmsg[8192];
+
+  fp = fopen(fname, mode);
+  if (fp != NULL)
+    return fp;
+
+  sprintf(errmsg,"file: %s, mode: %s, [%s]", fname, mode, msg);
+  perror(errmsg);
+  errexit("Failed on gk_fopen()\n");
+
+  return NULL;
+}
+
+
+/*************************************************************************
+* This function closes a file
+**************************************************************************/
+void gk_fclose(FILE *fp)
+{
+  fclose(fp);
+}
+
+
+/*************************************************************************/
+/*! This function is a wrapper around the read() function that ensures 
+    that all data is been read, by issuing multiple read requests.
+    The only time when not 'count' items are read is when the EOF has been
+    reached.
+*/
+/*************************************************************************/
+ssize_t gk_read(int fd, void *vbuf, size_t count)
+{
+  char *buf = (char *)vbuf;
+  ssize_t rsize, tsize=count;
+
+  do {
+    if ((rsize = read(fd, buf, tsize)) == -1)
+      return -1;
+    buf   += rsize;
+    tsize -= rsize;
+  } while (tsize > 0 && rsize > 0);
+
+  return count-tsize;
+}
+
+
+/*************************************************************************/
+/*! This function is a wrapper around the write() function that ensures 
+    that all data is been written, by issueing multiple write requests.
+*/
+/*************************************************************************/
+ssize_t gk_write(int fd, void *vbuf, size_t count)
+{
+  char *buf = (char *)vbuf;
+  ssize_t size, tsize=count;
+
+  do {
+    if ((size = write(fd, buf, tsize)) == -1)
+      return -1;
+    buf   += size;
+    tsize -= size;
+  } while (tsize > 0);
+
+  return count;
+}
+
+
+/*************************************************************************/
+/*! This function is the GKlib implementation of glibc's getline()
+    function.
+    \returns -1 if the EOF has been reached, otherwise it returns the 
+             number of bytes read.
+*/
+/*************************************************************************/
+ssize_t gk_getline(char **lineptr, size_t *n, FILE *stream)
+{
+#ifdef HAVE_GETLINE
+  return getline(lineptr, n, stream);
+#else
+  size_t i;
+  int ch;
+
+  if (feof(stream))
+    return -1;  
+
+  /* Initial memory allocation if *lineptr is NULL */
+  if (*lineptr == NULL || *n == 0) {
+    *n = 1024;
+    *lineptr = gk_malloc((*n)*sizeof(char), "gk_getline: lineptr");
+  }
+
+  /* get into the main loop */
+  i = 0;
+  while ((ch = getc(stream)) != EOF) {
+    (*lineptr)[i++] = (char)ch;
+
+    /* reallocate memory if reached at the end of the buffer. The +1 is for '\0' */
+    if (i+1 == *n) { 
+      *n = 2*(*n);
+      *lineptr = gk_realloc(*lineptr, (*n)*sizeof(char), "gk_getline: lineptr");
+    }
+      
+    if (ch == '\n')
+      break;
+  }
+  (*lineptr)[i] = '\0';
+
+  return (i == 0 ? -1 : i);
+#endif
+}
+
+
+/*************************************************************************/
+/*! This function reads the contents of a text file and returns it in the
+    form of an array of strings.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+char **gk_readfile(char *fname, size_t *r_nlines)
+{
+  size_t lnlen, nlines=0;
+  char *line=NULL, **lines=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    lines = (char **)gk_malloc(nlines*sizeof(char *), "gk_readfile: lines");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      gk_strtprune(line, "\n\r");
+      lines[nlines++] = gk_strdup(line);
+    }
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return lines;
+}
+
+
+/*************************************************************************/
+/*! This function reads the contents of a file and returns it in the
+    form of an array of int32_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int32_t *gk_i32readfile(char *fname, size_t *r_nlines)
+{
+  size_t lnlen, nlines=0;
+  char *line=NULL;
+  int32_t *array=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    array = gk_i32malloc(nlines, "gk_i32readfile: array");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      sscanf(line, "%"SCNd32, &array[nlines++]);
+    }
+
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a file and returns it in the
+    form of an array of int64_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int64_t *gk_i64readfile(char *fname, size_t *r_nlines)
+{
+  size_t lnlen, nlines=0;
+  char *line=NULL;
+  int64_t *array=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    array = gk_i64malloc(nlines, "gk_i64readfile: array");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      sscanf(line, "%"SCNd64, &array[nlines++]);
+    }
+
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a file and returns it in the
+    form of an array of ssize_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+ssize_t *gk_zreadfile(char *fname, size_t *r_nlines)
+{
+  size_t lnlen, nlines=0;
+  char *line=NULL;
+  ssize_t *array=NULL;
+  FILE *fpin;
+
+  gk_getfilestats(fname, &nlines, NULL, NULL, NULL);
+  if (nlines > 0) {
+    array = gk_zmalloc(nlines, "gk_zreadfile: array");
+
+    fpin = gk_fopen(fname, "r", "gk_readfile");
+    nlines = 0;
+
+    while (gk_getline(&line, &lnlen, fpin) != -1) {
+      sscanf(line, "%zd", &array[nlines++]);
+    }
+
+    gk_fclose(fpin);
+  }
+
+  gk_free((void **)&line, LTERM);
+
+  if (r_nlines != NULL)
+    *r_nlines  = nlines;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of char.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+char *gk_creadfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  char *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  nelmnts = fsize;
+  array = gk_cmalloc(nelmnts, "gk_creadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_creadfilebin");
+  if (fread(array, sizeof(char), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zu\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_cwritefilebin(char *fname, size_t n, char *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(char), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of int32_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int32_t *gk_i32readfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  int32_t *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(int32_t) != 0) {
+    gk_errexit(SIGERR, "The size [%zd] of the file [%s] is not in multiples of sizeof(int32_t).\n", fsize, fname);
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(int32_t);
+  array = gk_i32malloc(nelmnts, "gk_i32readfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_i32readfilebin");
+  
+  if (fread(array, sizeof(int32_t), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_i32writefilebin(char *fname, size_t n, int32_t *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(int32_t), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of int64_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+int64_t *gk_i64readfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  int64_t *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(int64_t) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(int64_t).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(int64_t);
+  array = gk_i64malloc(nelmnts, "gk_i64readfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_i64readfilebin");
+  
+  if (fread(array, sizeof(int64_t), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_i64writefilebin(char *fname, size_t n, int64_t *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(int64_t), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of ssize_t.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+ssize_t *gk_zreadfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  ssize_t *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(ssize_t) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(ssize_t).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(ssize_t);
+  array = gk_zmalloc(nelmnts, "gk_zreadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_zreadfilebin");
+  
+  if (fread(array, sizeof(ssize_t), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_zwritefilebin(char *fname, size_t n, ssize_t *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(ssize_t), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of float.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+float *gk_freadfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  float *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(float) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(float).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(float);
+  array = gk_fmalloc(nelmnts, "gk_freadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_freadfilebin");
+  
+  if (fread(array, sizeof(float), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_fwritefilebin(char *fname, size_t n, float *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_fwritefilebin");
+
+  fsize = fwrite(a, sizeof(float), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
+/*************************************************************************/
+/*! This function reads the contents of a binary file and returns it in the
+    form of an array of double.
+    \param fname is the name of the file
+    \param r_nlines is the number of lines in the file. If it is NULL,
+           this information is not returned.
+*/
+/*************************************************************************/
+double *gk_dreadfilebin(char *fname, size_t *r_nelmnts)
+{
+  size_t nelmnts;
+  ssize_t fsize;
+  double *array=NULL;
+  FILE *fpin;
+
+  *r_nelmnts = 0;
+
+  fsize = gk_getfsize(fname);
+
+  if (fsize == -1) {
+    gk_errexit(SIGERR, "Failed to fstat(%s).\n", fname);
+    return NULL;
+  }
+
+  if (fsize%sizeof(double) != 0) {
+    gk_errexit(SIGERR, "The size of the file is not in multiples of sizeof(double).\n");
+    return NULL;
+  }
+
+  nelmnts = fsize/sizeof(double);
+  array = gk_dmalloc(nelmnts, "gk_dreadfilebin: array");
+
+  fpin = gk_fopen(fname, "rb", "gk_dreadfilebin");
+  
+  if (fread(array, sizeof(double), nelmnts, fpin) != nelmnts) {
+    gk_errexit(SIGERR, "Failed to read the number of words requested. %zd\n", nelmnts);
+    gk_free((void **)&array, LTERM);
+    return NULL;
+  }
+  gk_fclose(fpin);
+
+  *r_nelmnts = nelmnts;
+
+  return array;
+}
+
+/*************************************************************************/
+/*! This function writes the contents of an array into a binary file.
+    \param fname is the name of the file
+    \param n the number of elements in the array.
+    \param a the array to be written out.
+*/
+/*************************************************************************/
+size_t gk_dwritefilebin(char *fname, size_t n, double *a)
+{
+  size_t fsize;
+  FILE *fp;
+
+  fp = gk_fopen(fname, "wb", "gk_writefilebin");
+
+  fsize = fwrite(a, sizeof(double), n, fp);
+
+  gk_fclose(fp);
+
+  return fsize;
+}
+
diff --git a/itemsets.c b/itemsets.c
new file mode 100644
index 0000000..beb58ae
--- /dev/null
+++ b/itemsets.c
@@ -0,0 +1,210 @@
+/*!
+ * \file
+ * \brief Frequent/Closed itemset discovery routines 
+ *
+ * This file contains the code for finding frequent/closed itemests. These routines
+ * are implemented using a call-back mechanism to deal with the discovered itemsets.
+ *
+ * \date 6/13/2008
+ * \author George Karypis
+ * \version\verbatim $Id: itemsets.c 19240 2015-10-22 12:41:19Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+/*-------------------------------------------------------------*/
+/*! Data structures for use within this module */
+/*-------------------------------------------------------------*/
+typedef struct {
+  int minfreq;  /* the minimum frequency of a pattern */
+  int maxfreq;  /* the maximum frequency of a pattern */
+  int minlen;   /* the minimum length of the requested pattern */
+  int maxlen;   /* the maximum length of the requested pattern */
+  int tnitems;  /* the initial range of the item space */
+
+  /* the call-back function */
+  void (*callback)(void *stateptr, int nitems, int *itemids, int ntrans, int *transids); 
+  void *stateptr;   /* the user-supplied pointer to pass to the callback */
+
+  /* workspace variables */
+  int *rmarker;
+  gk_ikv_t *cand;
+} isparams_t;
+
+
+/*-------------------------------------------------------------*/
+/*! Prototypes for this module */
+/*-------------------------------------------------------------*/
+void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, 
+         int preflen, int *prefix);
+gk_csr_t *itemsets_project_matrix(isparams_t *param, gk_csr_t *mat, int cid);
+
+
+
+/*************************************************************************/
+/*! The entry point of the frequent itemset discovery code */
+/*************************************************************************/
+void gk_find_frequent_itemsets(int ntrans, ssize_t *tranptr, int *tranind, 
+        int minfreq, int maxfreq, int minlen, int maxlen, 
+        void (*process_itemset)(void *stateptr, int nitems, int *itemids, 
+                                int ntrans, int *transids),
+        void *stateptr)
+{
+  ssize_t i;
+  gk_csr_t *mat, *pmat;
+  isparams_t params;
+  int *pattern;
+
+  /* Create the matrix */
+  mat = gk_csr_Create();
+  mat->nrows  = ntrans;
+  mat->ncols  = tranind[gk_iargmax(tranptr[ntrans], tranind, 1)]+1;
+  mat->rowptr = gk_zcopy(ntrans+1, tranptr, gk_zmalloc(ntrans+1, "gk_find_frequent_itemsets: mat.rowptr"));
+  mat->rowind = gk_icopy(tranptr[ntrans], tranind, gk_imalloc(tranptr[ntrans], "gk_find_frequent_itemsets: mat.rowind"));
+  mat->colids = gk_iincset(mat->ncols, 0, gk_imalloc(mat->ncols, "gk_find_frequent_itemsets: mat.colids"));
+
+  /* Setup the parameters */
+  params.minfreq  = minfreq;
+  params.maxfreq  = (maxfreq == -1 ? mat->nrows : maxfreq);
+  params.minlen   = minlen;
+  params.maxlen   = (maxlen == -1 ? mat->ncols : maxlen);
+  params.tnitems  = mat->ncols;
+  params.callback = process_itemset;
+  params.stateptr = stateptr;
+  params.rmarker  = gk_ismalloc(mat->nrows, 0, "gk_find_frequent_itemsets: rmarker");
+  params.cand     = gk_ikvmalloc(mat->ncols, "gk_find_frequent_itemsets: cand");
+
+  /* Perform the initial projection */
+  gk_csr_CreateIndex(mat, GK_CSR_COL);
+  pmat = itemsets_project_matrix(&params, mat, -1);
+  gk_csr_Free(&mat);
+
+  pattern = gk_imalloc(pmat->ncols, "gk_find_frequent_itemsets: pattern");
+  itemsets_find_frequent_itemsets(&params, pmat, 0, pattern); 
+
+  gk_csr_Free(&pmat);
+  gk_free((void **)&pattern, &params.rmarker, &params.cand, LTERM);
+
+}
+
+
+
+/*************************************************************************/
+/*! The recursive routine for DFS-based frequent pattern discovery */
+/*************************************************************************/
+void itemsets_find_frequent_itemsets(isparams_t *params, gk_csr_t *mat, 
+         int preflen, int *prefix)
+{
+  ssize_t i;
+  gk_csr_t *cmat;
+
+  /* Project each frequent column */
+  for (i=0; i<mat->ncols; i++) {
+    prefix[preflen] = mat->colids[i];
+
+    if (preflen+1 >= params->minlen)
+      (*params->callback)(params->stateptr, preflen+1, prefix, 
+           mat->colptr[i+1]-mat->colptr[i], mat->colind+mat->colptr[i]);
+
+    if (preflen+1 < params->maxlen) {
+      cmat = itemsets_project_matrix(params, mat, i);
+      itemsets_find_frequent_itemsets(params, cmat, preflen+1, prefix);
+      gk_csr_Free(&cmat);
+    }
+  }
+
+}
+
+
+/******************************************************************************/
+/*! This function projects a matrix w.r.t. to a particular column. 
+    It performs the following steps:
+    - Determines the length of each column that is remaining.
+    - Sorts the columns in increasing length.
+    - Creates a column-based version of the matrix with the proper
+      column ordering.
+ */
+/*******************************************************************************/
+gk_csr_t *itemsets_project_matrix(isparams_t *params, gk_csr_t *mat, int cid)
+{
+  ssize_t i, j, k, ii, pnnz;
+  int nrows, ncols, pnrows, pncols;
+  ssize_t *colptr, *pcolptr;
+  int *colind, *colids, *pcolind, *pcolids, *rmarker;
+  gk_csr_t *pmat;
+  gk_ikv_t *cand;
+
+  nrows  = mat->nrows;
+  ncols  = mat->ncols;
+  colptr = mat->colptr;
+  colind = mat->colind;
+  colids = mat->colids;
+
+  rmarker = params->rmarker;
+  cand    = params->cand;
+
+
+  /* Allocate space for the projected matrix based on what you know thus far */
+  pmat = gk_csr_Create();
+  pmat->nrows  = pnrows = (cid == -1 ? nrows : colptr[cid+1]-colptr[cid]);
+
+
+  /* Mark the rows that will be kept and determine the prowids */
+  if (cid == -1) { /* Initial projection */
+    gk_iset(nrows, 1, rmarker);
+  }
+  else { /* The other projections */
+    for (i=colptr[cid]; i<colptr[cid+1]; i++) 
+      rmarker[colind[i]] = 1;
+  }
+
+
+  /* Determine the length of each column that will be left in the projected matrix */
+  for (pncols=0, pnnz=0, i=cid+1; i<ncols; i++) {
+    for (k=0, j=colptr[i]; j<colptr[i+1]; j++) {
+      k += rmarker[colind[j]];
+    }
+    if (k >= params->minfreq && k <= params->maxfreq) {
+      cand[pncols].val   = i;
+      cand[pncols++].key = k;
+      pnnz += k;
+    }
+  }
+
+  /* Sort the columns in increasing order */
+  gk_ikvsorti(pncols, cand);
+
+
+  /* Allocate space for the remaining fields of the projected matrix */
+  pmat->ncols  = pncols;
+  pmat->colids = pcolids = gk_imalloc(pncols, "itemsets_project_matrix: pcolids");
+  pmat->colptr = pcolptr = gk_zmalloc(pncols+1, "itemsets_project_matrix: pcolptr");
+  pmat->colind = pcolind = gk_imalloc(pnnz, "itemsets_project_matrix: pcolind");
+
+
+  /* Populate the projected matrix */
+  pcolptr[0] = 0;
+  for (pnnz=0, ii=0; ii<pncols; ii++) {
+    i = cand[ii].val;
+    for (j=colptr[i]; j<colptr[i+1]; j++) {
+      if (rmarker[colind[j]]) 
+        pcolind[pnnz++] = colind[j];
+    }
+
+    pcolids[ii] = colids[i];
+    pcolptr[ii+1] = pnnz;
+  }
+
+
+  /* Reset the rmarker array */
+  if (cid == -1) { /* Initial projection */
+    gk_iset(nrows, 0, rmarker);
+  }
+  else { /* The other projections */
+    for (i=colptr[cid]; i<colptr[cid+1]; i++) 
+      rmarker[colind[i]] = 0;
+  }
+
+
+  return pmat;
+}
diff --git a/mcore.c b/mcore.c
new file mode 100644
index 0000000..6442e03
--- /dev/null
+++ b/mcore.c
@@ -0,0 +1,393 @@
+/*!
+\file 
+\brief Functions dealing with creating and allocating mcores
+
+\date Started 5/30/11
+\author George
+\author Copyright 1997-2011, Regents of the University of Minnesota 
+\version $Id: mcore.c 13953 2013-03-30 16:20:07Z karypis $
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! This function creates an mcore 
+ */
+/*************************************************************************/
+gk_mcore_t *gk_mcoreCreate(size_t coresize)
+{
+  gk_mcore_t *mcore;
+
+  mcore = (gk_mcore_t *)gk_malloc(sizeof(gk_mcore_t), "gk_mcoreCreate: mcore");
+  memset(mcore, 0, sizeof(gk_mcore_t));
+
+  mcore->coresize = coresize;
+  mcore->corecpos = 0;
+
+  mcore->core = (coresize == 0 ? NULL : gk_malloc(mcore->coresize, "gk_mcoreCreate: core"));
+
+  /* allocate the memory for keeping track of malloc ops */
+  mcore->nmops = 2048;
+  mcore->cmop  = 0;
+  mcore->mops  = (gk_mop_t *)gk_malloc(mcore->nmops*sizeof(gk_mop_t), "gk_mcoreCreate: mcore->mops");
+
+  return mcore;
+}
+
+
+/*************************************************************************/
+/*! This function creates an mcore. This version is used for gkmcore.
+ */
+/*************************************************************************/
+gk_mcore_t *gk_gkmcoreCreate()
+{
+  gk_mcore_t *mcore;
+
+  if ((mcore = (gk_mcore_t *)malloc(sizeof(gk_mcore_t))) == NULL)
+    return NULL;
+  memset(mcore, 0, sizeof(gk_mcore_t));
+
+  /* allocate the memory for keeping track of malloc ops */
+  mcore->nmops = 2048;
+  mcore->cmop  = 0;
+  if ((mcore->mops = (gk_mop_t *)malloc(mcore->nmops*sizeof(gk_mop_t))) == NULL) {
+    free(mcore);
+    return NULL;
+  }
+
+  return mcore;
+}
+
+
+/*************************************************************************/
+/*! This function destroys an mcore.
+ */
+/*************************************************************************/
+void gk_mcoreDestroy(gk_mcore_t **r_mcore, int showstats)
+{
+  gk_mcore_t *mcore = *r_mcore;
+
+  if (mcore == NULL)
+    return;
+
+  if (showstats)
+    printf("\n gk_mcore statistics\n" 
+           "           coresize: %12zu         nmops: %12zu  cmop: %6zu\n"
+           "        num_callocs: %12zu   num_hallocs: %12zu\n"
+           "       size_callocs: %12zu  size_hallocs: %12zu\n"
+           "        cur_callocs: %12zu   cur_hallocs: %12zu\n"
+           "        max_callocs: %12zu   max_hallocs: %12zu\n",
+           mcore->coresize, mcore->nmops, mcore->cmop,
+           mcore->num_callocs,  mcore->num_hallocs,
+           mcore->size_callocs, mcore->size_hallocs,
+           mcore->cur_callocs,  mcore->cur_hallocs,
+           mcore->max_callocs,  mcore->max_hallocs);
+
+  if (mcore->cur_callocs != 0 || mcore->cur_hallocs != 0 || mcore->cmop != 0) {
+    printf("***Warning: mcore memory was not fully freed when destroyed.\n"
+           " cur_callocs: %6zu  cur_hallocs: %6zu cmop: %6zu\n",
+           mcore->cur_callocs,  mcore->cur_hallocs, mcore->cmop);
+  }
+
+  gk_free((void **)&mcore->core, &mcore->mops, &mcore, LTERM);
+
+  *r_mcore = NULL;
+}
+
+
+/*************************************************************************/
+/*! This function destroys an mcore. This version is for gkmcore.
+ */
+/*************************************************************************/
+void gk_gkmcoreDestroy(gk_mcore_t **r_mcore, int showstats)
+{
+  gk_mcore_t *mcore = *r_mcore;
+
+  if (mcore == NULL)
+    return;
+
+  if (showstats)
+    printf("\n gk_mcore statistics\n" 
+           "         nmops: %12zu  cmop: %6zu\n"
+           "   num_hallocs: %12zu\n"
+           "  size_hallocs: %12zu\n"
+           "   cur_hallocs: %12zu\n"
+           "   max_hallocs: %12zu\n",
+           mcore->nmops, mcore->cmop,
+           mcore->num_hallocs,
+           mcore->size_hallocs,
+           mcore->cur_hallocs,
+           mcore->max_hallocs);
+
+  if (mcore->cur_hallocs != 0 || mcore->cmop != 0) {
+    printf("***Warning: mcore memory was not fully freed when destroyed.\n"
+           " cur_hallocs: %6zu cmop: %6zu\n",
+           mcore->cur_hallocs, mcore->cmop);
+  }
+
+  free(mcore->mops);
+  free(mcore);
+
+  *r_mcore = NULL;
+}
+
+
+/*************************************************************************/
+/*! This function allocate space from the core/heap 
+ */
+/*************************************************************************/
+void *gk_mcoreMalloc(gk_mcore_t *mcore, size_t nbytes)
+{
+  void *ptr;
+
+  /* pad to make pointers 8-byte aligned */
+  nbytes += (nbytes%8 == 0 ? 0 : 8 - nbytes%8);
+
+  if (mcore->corecpos + nbytes < mcore->coresize) {
+    /* service this request from the core */
+    ptr = ((char *)mcore->core)+mcore->corecpos;
+    mcore->corecpos += nbytes;
+
+    gk_mcoreAdd(mcore, GK_MOPT_CORE, nbytes, ptr);
+  }
+  else {
+    /* service this request from the heap */
+    ptr = gk_malloc(nbytes, "gk_mcoremalloc: ptr");
+
+    gk_mcoreAdd(mcore, GK_MOPT_HEAP, nbytes, ptr);
+  }
+
+  /*
+  printf("MCMALLOC: %zu %d %8zu\n", mcore->cmop-1, 
+      mcore->mops[mcore->cmop-1].type, mcore->mops[mcore->cmop-1].nbytes);
+  */
+
+  return ptr;
+}
+
+
+/*************************************************************************/
+/*! This function sets a marker in the stack of malloc ops to be used
+    subsequently for freeing purposes 
+ */
+/*************************************************************************/
+void gk_mcorePush(gk_mcore_t *mcore)
+{
+  gk_mcoreAdd(mcore, GK_MOPT_MARK, 0, NULL);
+  /* printf("MCPPUSH:   %zu\n", mcore->cmop-1); */
+}
+
+
+/*************************************************************************/
+/*! This function sets a marker in the stack of malloc ops to be used
+    subsequently for freeing purposes. This is the gkmcore version.
+ */
+/*************************************************************************/
+void gk_gkmcorePush(gk_mcore_t *mcore)
+{
+  gk_gkmcoreAdd(mcore, GK_MOPT_MARK, 0, NULL);
+  /* printf("MCPPUSH:   %zu\n", mcore->cmop-1); */
+}
+
+
+/*************************************************************************/
+/*! This function frees all mops since the last push 
+ */
+/*************************************************************************/
+void gk_mcorePop(gk_mcore_t *mcore)
+{
+  while (mcore->cmop > 0) {
+    mcore->cmop--;
+    switch (mcore->mops[mcore->cmop].type) {
+      case GK_MOPT_MARK: /* push marker */
+        goto DONE;
+        break; 
+
+      case GK_MOPT_CORE: /* core free */
+        if (mcore->corecpos < mcore->mops[mcore->cmop].nbytes)
+          errexit("Internal Error: wspace's core is about to be over-freed [%zu, %zu, %zd]\n",
+              mcore->coresize, mcore->corecpos, mcore->mops[mcore->cmop].nbytes);
+
+        mcore->corecpos    -= mcore->mops[mcore->cmop].nbytes;
+        mcore->cur_callocs -= mcore->mops[mcore->cmop].nbytes;
+        break;
+
+      case GK_MOPT_HEAP: /* heap free */
+        gk_free((void **)&mcore->mops[mcore->cmop].ptr, LTERM);
+        mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes;
+        break;
+
+      default:
+        gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type);
+    }
+  }
+
+DONE:
+  ;
+  /*printf("MCPPOP:    %zu\n", mcore->cmop); */
+}
+
+
+/*************************************************************************/
+/*! This function frees all mops since the last push. This version is
+    for poping the gkmcore and it uses free instead of gk_free.
+ */
+/*************************************************************************/
+void gk_gkmcorePop(gk_mcore_t *mcore)
+{
+  while (mcore->cmop > 0) {
+    mcore->cmop--;
+    switch (mcore->mops[mcore->cmop].type) {
+      case GK_MOPT_MARK: /* push marker */
+        goto DONE;
+        break; 
+
+      case GK_MOPT_HEAP: /* heap free */
+        free(mcore->mops[mcore->cmop].ptr);
+        mcore->cur_hallocs -= mcore->mops[mcore->cmop].nbytes;
+        break;
+
+      default:
+        gk_errexit(SIGMEM, "Unknown mop type of %d\n", mcore->mops[mcore->cmop].type);
+    }
+  }
+
+DONE:
+  ;
+}
+
+
+/*************************************************************************/
+/*! Adds a memory allocation at the end of the list.
+ */
+/*************************************************************************/
+void gk_mcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr)
+{
+  if (mcore->cmop == mcore->nmops) {
+    mcore->nmops *= 2;
+    mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t));
+    if (mcore->mops == NULL) 
+      gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n");
+  }
+
+  mcore->mops[mcore->cmop].type   = type;
+  mcore->mops[mcore->cmop].nbytes = nbytes;
+  mcore->mops[mcore->cmop].ptr    = ptr;
+  mcore->cmop++;
+
+  switch (type) {
+    case GK_MOPT_MARK:
+      break;
+
+    case GK_MOPT_CORE:
+      mcore->num_callocs++;
+      mcore->size_callocs += nbytes;
+      mcore->cur_callocs  += nbytes;
+      if (mcore->max_callocs < mcore->cur_callocs)
+        mcore->max_callocs = mcore->cur_callocs;
+      break;
+
+    case GK_MOPT_HEAP:
+      mcore->num_hallocs++;
+      mcore->size_hallocs += nbytes;
+      mcore->cur_hallocs  += nbytes;
+      if (mcore->max_hallocs < mcore->cur_hallocs)
+        mcore->max_hallocs = mcore->cur_hallocs;
+      break;
+    default:
+      gk_errexit(SIGMEM, "Incorrect mcore type operation.\n");
+  }
+}
+
+
+/*************************************************************************/
+/*! Adds a memory allocation at the end of the list. This is the gkmcore
+    version.
+ */
+/*************************************************************************/
+void gk_gkmcoreAdd(gk_mcore_t *mcore, int type, size_t nbytes, void *ptr)
+{
+  if (mcore->cmop == mcore->nmops) {
+    mcore->nmops *= 2;
+    mcore->mops = realloc(mcore->mops, mcore->nmops*sizeof(gk_mop_t));
+    if (mcore->mops == NULL) 
+      gk_errexit(SIGMEM, "***Memory allocation for gkmcore failed.\n");
+  }
+
+  mcore->mops[mcore->cmop].type   = type;
+  mcore->mops[mcore->cmop].nbytes = nbytes;
+  mcore->mops[mcore->cmop].ptr    = ptr;
+  mcore->cmop++;
+
+  switch (type) {
+    case GK_MOPT_MARK:
+      break;
+
+    case GK_MOPT_HEAP:
+      mcore->num_hallocs++;
+      mcore->size_hallocs += nbytes;
+      mcore->cur_hallocs  += nbytes;
+      if (mcore->max_hallocs < mcore->cur_hallocs)
+        mcore->max_hallocs = mcore->cur_hallocs;
+      break;
+    default:
+      gk_errexit(SIGMEM, "Incorrect mcore type operation.\n");
+  }
+}
+
+
+/*************************************************************************/
+/*! This function deletes the mop associated with the supplied pointer.
+    The mop has to be a heap allocation, otherwise it fails violently.
+ */
+/*************************************************************************/
+void gk_mcoreDel(gk_mcore_t *mcore, void *ptr)
+{
+  int i;
+
+  for (i=mcore->cmop-1; i>=0; i--) {
+    if (mcore->mops[i].type == GK_MOPT_MARK)
+      gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr);
+
+    if (mcore->mops[i].ptr == ptr) {
+      if (mcore->mops[i].type != GK_MOPT_HEAP)
+        gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n");
+
+      mcore->cur_hallocs -= mcore->mops[i].nbytes;
+      mcore->mops[i] = mcore->mops[--mcore->cmop];
+      return;
+    }
+  }
+
+  gk_errexit(SIGMEM, "mcoreDel should never have been here!\n");
+}
+
+
+/*************************************************************************/
+/*! This function deletes the mop associated with the supplied pointer.
+    The mop has to be a heap allocation, otherwise it fails violently.
+    This is the gkmcore version.
+ */
+/*************************************************************************/
+void gk_gkmcoreDel(gk_mcore_t *mcore, void *ptr)
+{
+  int i;
+
+  for (i=mcore->cmop-1; i>=0; i--) {
+    if (mcore->mops[i].type == GK_MOPT_MARK)
+      gk_errexit(SIGMEM, "Could not find pointer %p in mcore\n", ptr);
+
+    if (mcore->mops[i].ptr == ptr) {
+      if (mcore->mops[i].type != GK_MOPT_HEAP)
+        gk_errexit(SIGMEM, "Trying to delete a non-HEAP mop.\n");
+
+      mcore->cur_hallocs -= mcore->mops[i].nbytes;
+      mcore->mops[i] = mcore->mops[--mcore->cmop];
+      return;
+    }
+  }
+
+  gk_errexit(SIGMEM, "gkmcoreDel should never have been here!\n");
+}
+
diff --git a/memory.c b/memory.c
new file mode 100644
index 0000000..e6dc99c
--- /dev/null
+++ b/memory.c
@@ -0,0 +1,307 @@
+/*!
+\file  memory.c
+\brief This file contains various allocation routines 
+
+The allocation routines included are for 1D and 2D arrays of the 
+most datatypes that GKlib support. Many of these routines are 
+defined with the help of the macros in gk_memory.h. These macros 
+can be used to define other memory allocation routines.
+
+\date   Started 4/3/2007
+\author George
+\version\verbatim $Id: memory.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+/* This is for the global mcore that tracks all heap allocations */
+static __thread gk_mcore_t *gkmcore = NULL;
+
+
+/*************************************************************************/
+/*! Define the set of memory allocation routines for each data type */
+/**************************************************************************/
+GK_MKALLOC(gk_c,    char)
+GK_MKALLOC(gk_i,    int)
+GK_MKALLOC(gk_i8,   int8_t)
+GK_MKALLOC(gk_i16,  int16_t)
+GK_MKALLOC(gk_i32,  int32_t)
+GK_MKALLOC(gk_i64,  int64_t)
+GK_MKALLOC(gk_ui8,  uint8_t)
+GK_MKALLOC(gk_ui16, uint16_t)
+GK_MKALLOC(gk_ui32, uint32_t)
+GK_MKALLOC(gk_ui64, uint64_t)
+GK_MKALLOC(gk_z,    ssize_t)
+GK_MKALLOC(gk_zu,   size_t)
+GK_MKALLOC(gk_f,    float)
+GK_MKALLOC(gk_d,    double)
+GK_MKALLOC(gk_idx,  gk_idx_t)
+
+GK_MKALLOC(gk_ckv,   gk_ckv_t)
+GK_MKALLOC(gk_ikv,   gk_ikv_t)
+GK_MKALLOC(gk_i8kv,  gk_i8kv_t)
+GK_MKALLOC(gk_i16kv, gk_i16kv_t)
+GK_MKALLOC(gk_i32kv, gk_i32kv_t)
+GK_MKALLOC(gk_i64kv, gk_i64kv_t)
+GK_MKALLOC(gk_zkv,   gk_zkv_t)
+GK_MKALLOC(gk_zukv,  gk_zukv_t)
+GK_MKALLOC(gk_fkv,   gk_fkv_t)
+GK_MKALLOC(gk_dkv,   gk_dkv_t)
+GK_MKALLOC(gk_skv,   gk_skv_t)
+GK_MKALLOC(gk_idxkv, gk_idxkv_t)
+
+
+
+
+
+
+/*************************************************************************/
+/*! This function allocates a two-dimensional matrix.
+  */
+/*************************************************************************/
+void gk_AllocMatrix(void ***r_matrix, size_t elmlen, size_t ndim1, size_t ndim2)
+{
+  size_t i, j;
+  void **matrix;
+
+  *r_matrix = NULL;
+
+  if ((matrix = (void **)gk_malloc(ndim1*sizeof(void *), "gk_AllocMatrix: matrix")) == NULL)
+    return;
+
+  for (i=0; i<ndim1; i++) {
+    if ((matrix[i] = (void *)gk_malloc(ndim2*elmlen, "gk_AllocMatrix: matrix[i]")) == NULL) {
+      for (j=0; j<i; j++) 
+        gk_free((void **)&matrix[j], LTERM);
+      return;
+    }
+  }
+
+  *r_matrix = matrix;
+}
+
+
+/*************************************************************************/
+/*! This function frees a two-dimensional matrix.
+  */
+/*************************************************************************/
+void gk_FreeMatrix(void ***r_matrix, size_t ndim1, size_t ndim2)
+{
+  size_t i;
+  void **matrix;
+
+  if ((matrix = *r_matrix) == NULL)
+    return;
+
+  for (i=0; i<ndim1; i++) 
+    gk_free((void **)&matrix[i], LTERM);
+
+  gk_free((void **)r_matrix, LTERM); 
+
+}
+
+
+/*************************************************************************/
+/*! This function initializes tracking of heap allocations. 
+*/
+/*************************************************************************/
+int gk_malloc_init()
+{
+  if (gkmcore == NULL)
+    gkmcore = gk_gkmcoreCreate();
+
+  if (gkmcore == NULL)
+    return 0;
+
+  gk_gkmcorePush(gkmcore);
+
+  return 1;
+}
+
+
+/*************************************************************************/
+/*! This function frees the memory that has been allocated since the
+    last call to gk_malloc_init().
+*/
+/*************************************************************************/
+void gk_malloc_cleanup(int showstats)
+{
+  if (gkmcore != NULL) {
+    gk_gkmcorePop(gkmcore);
+    if (gkmcore->cmop == 0) {
+      gk_gkmcoreDestroy(&gkmcore, showstats);
+      gkmcore = NULL;
+    }
+  }
+}
+
+
+/*************************************************************************/
+/*! This function is my wrapper around malloc that provides the following
+    enhancements over malloc:
+    * It always allocates one byte of memory, even if 0 bytes are requested.
+      This is to ensure that checks of returned values do not lead to NULL
+      due to 0 bytes requested.
+    * It zeros-out the memory that is allocated. This is for a quick init
+      of the underlying datastructures.
+*/
+/**************************************************************************/
+void *gk_malloc(size_t nbytes, char *msg)
+{
+  void *ptr=NULL;
+
+  if (nbytes == 0)
+    nbytes++;  /* Force mallocs to actually allocate some memory */
+
+  ptr = (void *)malloc(nbytes);
+
+  if (ptr == NULL) {
+    fprintf(stderr, "   Current memory used:  %10zu bytes\n", gk_GetCurMemoryUsed());
+    fprintf(stderr, "   Maximum memory used:  %10zu bytes\n", gk_GetMaxMemoryUsed());
+    gk_errexit(SIGMEM, "***Memory allocation failed for %s. Requested size: %zu bytes", 
+        msg, nbytes);
+    return NULL;
+  }
+
+  /* add this memory allocation */
+  if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr);
+
+  return ptr;
+}
+
+
+/*************************************************************************
+* This function is my wrapper around realloc
+**************************************************************************/
+void *gk_realloc(void *oldptr, size_t nbytes, char *msg)
+{
+  void *ptr=NULL;
+
+  if (nbytes == 0)
+    nbytes++;  /* Force mallocs to actually allocate some memory */
+
+  /* remove this memory de-allocation */
+  if (gkmcore != NULL && oldptr != NULL) gk_gkmcoreDel(gkmcore, oldptr);
+
+  ptr = (void *)realloc(oldptr, nbytes);
+
+  if (ptr == NULL) {
+    fprintf(stderr, "   Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed());
+    fprintf(stderr, "   Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed());
+    gk_errexit(SIGMEM, "***Memory realloc failed for %s. " "Requested size: %zu bytes", 
+        msg, nbytes);
+    return NULL;
+  }
+
+  /* add this memory allocation */
+  if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr);
+
+  return ptr;
+}
+
+
+/*************************************************************************
+* This function is my wrapper around free, allows multiple pointers    
+**************************************************************************/
+void gk_free(void **ptr1,...)
+{
+  va_list plist;
+  void **ptr;
+
+  if (*ptr1 != NULL) {
+    free(*ptr1);
+
+    /* remove this memory de-allocation */
+    if (gkmcore != NULL) 
+      gk_gkmcoreDel(gkmcore, *ptr1);
+  }
+  *ptr1 = NULL;
+
+  va_start(plist, ptr1);
+  while ((ptr = va_arg(plist, void **)) != LTERM) {
+    if (*ptr != NULL) {
+      free(*ptr);
+
+      /* remove this memory de-allocation */
+      if (gkmcore != NULL) 
+        gk_gkmcoreDel(gkmcore, *ptr);
+    }
+    *ptr = NULL;
+  }
+  va_end(plist);
+}          
+
+
+/*************************************************************************
+* This function returns the current ammount of dynamically allocated
+* memory that is used by the system
+**************************************************************************/
+size_t gk_GetCurMemoryUsed()
+{
+  if (gkmcore == NULL)
+    return 0;
+  else
+    return gkmcore->cur_hallocs;
+}
+
+
+/*************************************************************************
+* This function returns the maximum ammount of dynamically allocated 
+* memory that was used by the system
+**************************************************************************/
+size_t gk_GetMaxMemoryUsed()
+{
+  if (gkmcore == NULL)
+    return 0;
+  else
+    return gkmcore->max_hallocs;
+}
+
+
+/*************************************************************************/
+/*! This function returns the VmSize and VmRSS of the calling process. */
+/*************************************************************************/
+void gk_GetVMInfo(size_t *vmsize, size_t *vmrss)
+{
+  FILE *fp;
+  char fname[1024];
+
+  sprintf(fname, "/proc/%d/statm", getpid());
+  fp = gk_fopen(fname, "r", "proc/pid/statm");
+  if (fscanf(fp, "%zu %zu", vmsize, vmrss) != 2)
+    errexit("Failed to read to values from %s\n", fname);
+  gk_fclose(fp);
+
+  /*
+  *vmsize *= sysconf(_SC_PAGESIZE);
+  *vmrss  *= sysconf(_SC_PAGESIZE);
+  */
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! This function returns the peak virtual memory of the calling process
+    by reading the VmPeak field in /proc/self/status . */
+/*************************************************************************/
+size_t gk_GetProcVmPeak()
+{
+  FILE *fp;
+  char line[128];
+  size_t vmpeak=0;
+
+  if (gk_fexists("/proc/self/status")) {
+    fp = gk_fopen("/proc/self/status", "r", "proc/self/status");
+    while (fgets(line, 128, fp) != NULL) {
+      if (strncmp(line, "VmPeak:", 7) == 0) {
+        vmpeak = atoll(line+8)*1024;
+        break;
+      }
+    }
+    gk_fclose(fp);
+  }
+
+  return vmpeak;
+}
diff --git a/pqueue.c b/pqueue.c
new file mode 100644
index 0000000..2fb8515
--- /dev/null
+++ b/pqueue.c
@@ -0,0 +1,25 @@
+/*!
+\file  pqueue.c
+\brief This file implements various max-priority queues.
+
+The priority queues are generated using the GK_MKPQUEUE macro.
+
+\date   Started 3/27/2007
+\author George
+\version\verbatim $Id: pqueue.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Create the various max priority queues */
+/*************************************************************************/
+#define key_gt(a, b) ((a) > (b))
+GK_MKPQUEUE(gk_ipq,   gk_ipq_t,   gk_ikv_t,   int,      gk_idx_t, gk_ikvmalloc,   INT_MAX,    key_gt)
+GK_MKPQUEUE(gk_i32pq, gk_i32pq_t, gk_i32kv_t, int32_t,  gk_idx_t, gk_i32kvmalloc, INT32_MAX,  key_gt)
+GK_MKPQUEUE(gk_i64pq, gk_i64pq_t, gk_i64kv_t, int64_t,  gk_idx_t, gk_i64kvmalloc, INT64_MAX,  key_gt)
+GK_MKPQUEUE(gk_fpq,   gk_fpq_t,   gk_fkv_t,   float,    gk_idx_t, gk_fkvmalloc,   FLT_MAX,    key_gt)
+GK_MKPQUEUE(gk_dpq,   gk_dpq_t,   gk_dkv_t,   double,   gk_idx_t, gk_dkvmalloc,   DBL_MAX,    key_gt)
+GK_MKPQUEUE(gk_idxpq, gk_idxpq_t, gk_idxkv_t, gk_idx_t, gk_idx_t, gk_idxkvmalloc, GK_IDX_MAX, key_gt)
+#undef key_gt
diff --git a/random.c b/random.c
new file mode 100644
index 0000000..3698614
--- /dev/null
+++ b/random.c
@@ -0,0 +1,136 @@
+/*!
+\file  
+\brief Various routines for providing portable 32 and 64 bit random number
+       generators.
+
+\date   Started 5/17/2007
+\author George
+\version\verbatim $Id: random.c 18796 2015-06-02 11:39:45Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Create the various random number functions */
+/*************************************************************************/
+GK_MKRANDOM(gk_c,   size_t, char)
+GK_MKRANDOM(gk_i,   size_t, int)
+GK_MKRANDOM(gk_i32, size_t, int32_t)
+GK_MKRANDOM(gk_f,   size_t, float)
+GK_MKRANDOM(gk_d,   size_t, double)
+GK_MKRANDOM(gk_idx, size_t, gk_idx_t)
+GK_MKRANDOM(gk_z,   size_t, ssize_t)
+GK_MKRANDOM(gk_zu,  size_t, size_t)
+
+
+
+/*************************************************************************/
+/*! GKlib's built in random number generator for portability across 
+    different architectures */
+/*************************************************************************/
+#ifdef USE_GKRAND
+/* 
+   A C-program for MT19937-64 (2004/9/29 version).
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   This is a 64-bit version of Mersenne Twister pseudorandom number
+   generator.
+
+   Before using, initialize the state by using init_genrand64(seed)  
+   or init_by_array64(init_key, key_length).
+
+   Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.                          
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define NN 312
+#define MM 156
+#define MATRIX_A 0xB5026F5AA96619E9ULL
+#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */
+#define LM 0x7FFFFFFFULL /* Least significant 31 bits */
+
+
+/* The array for the state vector */
+static uint64_t mt[NN]; 
+/* mti==NN+1 means mt[NN] is not initialized */
+static int mti=NN+1; 
+#endif /* USE_GKRAND */
+
+/* initializes mt[NN] with a seed */
+void gk_randinit(uint64_t seed)
+{
+#ifdef USE_GKRAND
+  mt[0] = seed;
+  for (mti=1; mti<NN; mti++) 
+    mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti);
+#else
+  srand((unsigned int) seed);
+#endif
+}
+
+
+/* generates a random number on [0, 2^64-1]-interval */
+uint64_t gk_randint64(void)
+{
+#ifdef USE_GKRAND
+  int i;
+  unsigned long long x;
+  static uint64_t mag01[2]={0ULL, MATRIX_A};
+
+  if (mti >= NN) { /* generate NN words at one time */
+    /* if init_genrand64() has not been called, */
+    /* a default initial seed is used     */
+    if (mti == NN+1) 
+      gk_randinit(5489ULL); 
+
+    for (i=0; i<NN-MM; i++) {
+      x = (mt[i]&UM)|(mt[i+1]&LM);
+      mt[i] = mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+    }
+    for (; i<NN-1; i++) {
+      x = (mt[i]&UM)|(mt[i+1]&LM);
+      mt[i] = mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+    }
+    x = (mt[NN-1]&UM)|(mt[0]&LM);
+    mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
+
+    mti = 0;
+  }
+
+  x = mt[mti++];
+
+  x ^= (x >> 29) & 0x5555555555555555ULL;
+  x ^= (x << 17) & 0x71D67FFFEDA60000ULL;
+  x ^= (x << 37) & 0xFFF7EEE000000000ULL;
+  x ^= (x >> 43);
+
+  return x & 0x7FFFFFFFFFFFFFFF;
+#else
+  return (uint64_t)(((uint64_t) rand()) << 32 | ((uint64_t) rand()));
+#endif
+}
+
+/* generates a random number on [0, 2^32-1]-interval */
+uint32_t gk_randint32(void)
+{
+#ifdef USE_GKRAND
+  return (uint32_t)(gk_randint64() & 0x7FFFFFFF);
+#else
+  return (uint32_t)rand();
+#endif
+}
+
+
diff --git a/rw.c b/rw.c
new file mode 100644
index 0000000..7cd4391
--- /dev/null
+++ b/rw.c
@@ -0,0 +1,103 @@
+/*!
+ * \file 
+ *
+ * \brief Various routines that perform random-walk based operations
+          on graphs stored as gk_csr_t matrices.
+ *
+ * \author George Karypis
+ * \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim
+ */
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Computes the (personalized) page-rank of the vertices in a graph.
+
+  \param mat is the matrix storing the graph.
+  \param lamda is the restart probability.
+  \param eps is the error tolerance for convergance.
+  \param max_niter is the maximum number of allowed iterations.
+  \param pr on entry stores the restart distribution of the vertices. 
+         This allows for the computation of personalized page-rank scores 
+         by appropriately setting that parameter. 
+         On return, pr stores the computed page ranks.
+ 
+  \returns the number of iterations that were performed.
+*/
+/**************************************************************************/
+int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr)
+{
+  ssize_t i, j, k, iter, nrows;
+  double *rscale, *prold, *prnew, *prtmp;
+  double fromsinks, error;
+  ssize_t *rowptr;
+  int *rowind;
+  float *rowval;
+
+  nrows  = mat->nrows;
+  rowptr = mat->rowptr;
+  rowind = mat->rowind;
+  rowval = mat->rowval;
+
+  prold  = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew");
+  prnew  = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold");
+  rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale");
+
+  /* compute the scaling factors to get adjacency weights into transition 
+     probabilities */
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++)
+      rscale[i] += rowval[j];
+    if (rscale[i] > 0)
+      rscale[i] = 1.0/rscale[i];
+  }
+
+  /* the restart distribution is the initial pr scores */
+  for (i=0; i<nrows; i++)
+    prnew[i] = pr[i];
+
+  /* get into the PR iteration */
+  for (iter=0; iter<max_niter; iter++) {
+    gk_SWAP(prnew, prold, prtmp);
+    gk_dset(nrows, 0.0, prnew);
+
+    /* determine the total current PR score of the sinks so that you 
+       can distribute them to all nodes according to the restart 
+       distribution. */
+    for (fromsinks=0.0, i=0; i<nrows; i++) {
+      if (rscale[i] == 0) 
+        fromsinks += prold[i];
+    }
+
+    /* push random-walk scores to the outlinks */
+    for (i=0; i<nrows; i++) {
+      for (j=rowptr[i]; j<rowptr[i+1]; j++)
+        prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j];
+    }
+
+    /* apply the restart conditions */
+    for (i=0; i<nrows; i++) {
+      prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i];
+    }
+
+    /* compute the error */
+    for (error=0.0, i=0; i<nrows; i++) 
+      error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error);
+
+    //printf("nrm1: %le  maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error);
+
+    if (error < eps)
+      break;
+  }
+
+  /* store the computed pr scores into pr for output */
+  for (i=0; i<nrows; i++)
+    pr[i] = prnew[i];
+
+  gk_free((void **)&prnew, &prold, &rscale, LTERM);
+  
+  return (int)(iter+1);
+
+}
+
diff --git a/scripts/gexpand.pl b/scripts/gexpand.pl
new file mode 100644
index 0000000..2b82134
--- /dev/null
+++ b/scripts/gexpand.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl -w
+
+die "Usage $0 <gfile> <ncopies>\n" unless @ARGV == 2;
+
+$filein  = shift(@ARGV);
+$ncopies = shift(@ARGV);
+
+open(FPIN, "<$filein") or die "Could not open $filein. $!\n";
+
+$_ = <FPIN>;
+chomp($_);
+($nvtxs, $nedges) = split(' ', $_);
+
+#print "nvtxs: $nvtxs, nedges: $nedges\n";
+
+$u = 1;
+while (<FPIN>) {
+  chomp($_);
+  @edges = split(' ', $_);
+
+  # put the within layer edges
+  foreach $v (@edges) {
+    next if $v < $u;
+    for ($i=0; $i<$ncopies; $i++) {
+      printf("%d %d\n", $i*$nvtxs+$u-1, $i*$nvtxs+$v-1);
+      printf("%d %d\n", $i*$nvtxs+$v-1, $i*$nvtxs+$u-1);
+    }
+  }
+
+  # put the vertex across layer edges
+  for ($i=0; $i<$ncopies-1; $i++) {
+    printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$u-1);
+    printf("%d %d\n", ($i+1)*$nvtxs+$u-1, $i*$nvtxs+$u-1);
+  }
+
+  # put the adjacent across layer edges
+  for ($i=0; $i<$ncopies-1; $i++) {
+    $j=0;
+    foreach $v (@edges) {
+      $j++;
+      next if (($j+$i)%2 == 0);
+      printf("%d %d\n", $i*$nvtxs+$u-1, ($i+1)*$nvtxs+$v-1);
+      printf("%d %d\n", ($i+1)*$nvtxs+$v-1, $i*$nvtxs+$u-1);
+    }
+  }
+
+  goto DONE;
+
+DONE:
+  $u++;
+}
+
+close(FPIN);
diff --git a/seq.c b/seq.c
new file mode 100644
index 0000000..f267a3e
--- /dev/null
+++ b/seq.c
@@ -0,0 +1,174 @@
+/*
+ *
+ * Sequence handler library by Huzefa Rangwala
+ * Date : 03.01.2007
+ *
+ *
+ *
+ */
+
+
+#include <GKlib.h>
+
+
+
+
+/*********************************************************/
+/* ! \brief Initializes the <tt>gk_seq_t</tt> variable
+
+
+
+
+\param A pointer to gk_seq_t itself
+\returns null
+*/
+/***********************************************************************/
+
+void gk_seq_init(gk_seq_t *seq)
+{
+    
+    seq->len = 0;
+    seq->sequence = NULL;
+        
+    seq->pssm = NULL;
+    seq->psfm = NULL;
+    
+    seq->name = NULL;
+    
+}
+
+/***********************************************************************/
+/*! \brief This function creates the localizations for the various sequences
+
+\param    string i.e amino acids, nucleotides, sequences
+\returns  gk_i2cc2i_t variable
+*/
+/*********************************************************************/
+
+gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
+{
+    
+    
+    int nsymbols;
+    gk_idx_t i;
+    gk_i2cc2i_t *t;
+
+    nsymbols = strlen(alphabet);
+    t        = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
+    t->n     = nsymbols;
+    t->i2c   = gk_cmalloc(256, "gk_i2c_create_common");
+    t->c2i   = gk_imalloc(256, "gk_i2c_create_common");
+    
+
+    gk_cset(256, -1, t->i2c);
+    gk_iset(256, -1, t->c2i);
+    
+    for(i=0;i<nsymbols;i++){
+	t->i2c[i] = alphabet[i];
+	t->c2i[(int)alphabet[i]] = i;
+    }
+
+    return t;
+
+}
+
+
+/*********************************************************************/
+/*! \brief This function reads a pssm in the format of gkmod pssm
+
+\param file_name is the name of the pssm file
+\returns gk_seq_t
+*/
+/********************************************************************/
+gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
+{
+    gk_seq_t *seq;
+    gk_idx_t i, j, ii;
+    size_t ntokens, nbytes, len;
+    FILE *fpin;
+    
+    
+    gk_Tokens_t tokens;
+    static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
+    static int PSSMWIDTH = 20;
+    char *header, line[MAXLINELEN];
+    gk_i2cc2i_t *converter;
+
+    header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
+    
+    converter = gk_i2cc2i_create_common(AAORDER);
+    
+    gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
+    len --;
+
+    seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
+    gk_seq_init(seq);
+    
+    seq->len = len;
+    seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
+    seq->pssm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
+    seq->psfm     = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
+    
+    seq->nsymbols = PSSMWIDTH;
+    seq->name     = gk_getbasename(filename);
+    
+    fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
+
+
+    /* Read the header line */
+    if (fgets(line, MAXLINELEN-1, fpin) == NULL)
+      errexit("Unexpected end of file: %s\n", filename);
+    gk_strtoupper(line);
+    gk_strtokenize(line, " \t\n", &tokens);
+
+    for (i=0; i<PSSMWIDTH; i++)
+	header[i] = tokens.list[i][0];
+    
+    gk_freetokenslist(&tokens);
+    
+
+    /* Read the rest of the lines */
+    for (i=0, ii=0; ii<len; ii++) {
+	if (fgets(line, MAXLINELEN-1, fpin) == NULL)
+          errexit("Unexpected end of file: %s\n", filename);
+	gk_strtoupper(line);
+	gk_strtokenize(line, " \t\n", &tokens);
+	
+	seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
+	
+	for (j=0; j<PSSMWIDTH; j++) {
+	    seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
+	    seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
+	}
+	
+      
+	
+	gk_freetokenslist(&tokens);
+	i++;
+    }
+    
+    seq->len = i; /* Reset the length if certain characters were skipped */
+    
+    gk_free((void **)&header, LTERM);
+    gk_fclose(fpin);
+
+    return seq;
+}
+
+
+/**************************************************************************/
+/*! \brief This function frees the memory allocated to the seq structure.
+ 
+\param   gk_seq_t
+\returns nothing
+*/
+/**************************************************************************/
+void gk_seq_free(gk_seq_t *seq)
+{
+    gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
+    gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
+    gk_free((void **)&seq->name, &seq->sequence, LTERM);
+    //gk_free((void **)&seq, LTERM);
+    gk_free((void **) &seq, LTERM);
+
+}
diff --git a/sort.c b/sort.c
new file mode 100644
index 0000000..f0144ae
--- /dev/null
+++ b/sort.c
@@ -0,0 +1,437 @@
+/*!
+\file  sort.c
+\brief This file contains GKlib's various sorting routines
+
+These routines are implemented using the GKSORT macro that is defined
+in gk_qsort.h and is based on GNU's GLIBC qsort() implementation.
+
+Additional sorting routines can be created using the same way that
+these routines where defined.
+
+\date   Started 4/4/07
+\author George
+\version\verbatim $Id: sort.c 21050 2017-05-25 03:53:58Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+
+/*************************************************************************/
+/*! Sorts an array of chars in increasing order */
+/*************************************************************************/
+void gk_csorti(size_t n, char *base)
+{
+#define char_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(char, base, n, char_lt);
+#undef char_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of chars in decreasing order */
+/*************************************************************************/
+void gk_csortd(size_t n, char *base)
+{
+#define char_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(char, base, n, char_gt);
+#undef char_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_isorti(size_t n, int *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(int, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_isortd(size_t n, int *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(int, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_i32sorti(size_t n, int32_t *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(int32_t, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_i32sortd(size_t n, int32_t *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(int32_t, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_i64sorti(size_t n, int64_t *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(int64_t, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_ui32sorti(size_t n, uint32_t *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(uint32_t, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_ui32sortd(size_t n, uint32_t *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(uint32_t, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in increasing order */
+/*************************************************************************/
+void gk_ui64sorti(size_t n, uint64_t *base)
+{
+#define int_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(uint64_t, base, n, int_lt);
+#undef int_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_ui64sortd(size_t n, uint64_t *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(uint64_t, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of integers in decreasing order */
+/*************************************************************************/
+void gk_i64sortd(size_t n, int64_t *base)
+{
+#define int_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(int64_t, base, n, int_gt);
+#undef int_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of floats in increasing order */
+/*************************************************************************/
+void gk_fsorti(size_t n, float *base)
+{
+#define float_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(float, base, n, float_lt);
+#undef float_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of floats in decreasing order */
+/*************************************************************************/
+void gk_fsortd(size_t n, float *base)
+{
+#define float_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(float, base, n, float_gt);
+#undef float_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of doubles in increasing order */
+/*************************************************************************/
+void gk_dsorti(size_t n, double *base)
+{
+#define double_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(double, base, n, double_lt);
+#undef double_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of doubles in decreasing order */
+/*************************************************************************/
+void gk_dsortd(size_t n, double *base)
+{
+#define double_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(double, base, n, double_gt);
+#undef double_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idx_t in increasing order */
+/*************************************************************************/
+void gk_idxsorti(size_t n, gk_idx_t *base)
+{
+#define idx_lt(a, b) ((*a) < (*b))
+  GK_MKQSORT(gk_idx_t, base, n, idx_lt);
+#undef idx_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idx_t in decreasing order */
+/*************************************************************************/
+void gk_idxsortd(size_t n, gk_idx_t *base)
+{
+#define idx_gt(a, b) ((*a) > (*b))
+  GK_MKQSORT(gk_idx_t, base, n, idx_gt);
+#undef idx_gt
+}
+
+
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ckv_t in increasing order */
+/*************************************************************************/
+void gk_ckvsorti(size_t n, gk_ckv_t *base)
+{
+#define ckey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_ckv_t, base, n, ckey_lt);
+#undef ckey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ckv_t in decreasing order */
+/*************************************************************************/
+void gk_ckvsortd(size_t n, gk_ckv_t *base)
+{
+#define ckey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_ckv_t, base, n, ckey_gt);
+#undef ckey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ikv_t in increasing order */
+/*************************************************************************/
+void gk_ikvsorti(size_t n, gk_ikv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_ikv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_ikv_t in decreasing order */
+/*************************************************************************/
+void gk_ikvsortd(size_t n, gk_ikv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_ikv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i32kv_t in increasing order */
+/*************************************************************************/
+void gk_i32kvsorti(size_t n, gk_i32kv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_i32kv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i32kv_t in decreasing order */
+/*************************************************************************/
+void gk_i32kvsortd(size_t n, gk_i32kv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_i32kv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i64kv_t in increasing order */
+/*************************************************************************/
+void gk_i64kvsorti(size_t n, gk_i64kv_t *base)
+{
+#define ikey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_i64kv_t, base, n, ikey_lt);
+#undef ikey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_i64kv_t in decreasing order */
+/*************************************************************************/
+void gk_i64kvsortd(size_t n, gk_i64kv_t *base)
+{
+#define ikey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_i64kv_t, base, n, ikey_gt);
+#undef ikey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zkv_t in increasing order */
+/*************************************************************************/
+void gk_zkvsorti(size_t n, gk_zkv_t *base)
+{
+#define zkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_zkv_t, base, n, zkey_lt);
+#undef zkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zkv_t in decreasing order */
+/*************************************************************************/
+void gk_zkvsortd(size_t n, gk_zkv_t *base)
+{
+#define zkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_zkv_t, base, n, zkey_gt);
+#undef zkey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zukv_t in increasing order */
+/*************************************************************************/
+void gk_zukvsorti(size_t n, gk_zukv_t *base)
+{
+#define zukey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_zukv_t, base, n, zukey_lt);
+#undef zukey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_zukv_t in decreasing order */
+/*************************************************************************/
+void gk_zukvsortd(size_t n, gk_zukv_t *base)
+{
+#define zukey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_zukv_t, base, n, zukey_gt);
+#undef zukey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_fkv_t in increasing order */
+/*************************************************************************/
+void gk_fkvsorti(size_t n, gk_fkv_t *base)
+{
+#define fkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_fkv_t, base, n, fkey_lt);
+#undef fkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_fkv_t in decreasing order */
+/*************************************************************************/
+void gk_fkvsortd(size_t n, gk_fkv_t *base)
+{
+#define fkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_fkv_t, base, n, fkey_gt);
+#undef fkey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_dkv_t in increasing order */
+/*************************************************************************/
+void gk_dkvsorti(size_t n, gk_dkv_t *base)
+{
+#define dkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_dkv_t, base, n, dkey_lt);
+#undef dkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_fkv_t in decreasing order */
+/*************************************************************************/
+void gk_dkvsortd(size_t n, gk_dkv_t *base)
+{
+#define dkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_dkv_t, base, n, dkey_gt);
+#undef dkey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_skv_t in increasing order */
+/*************************************************************************/
+void gk_skvsorti(size_t n, gk_skv_t *base)
+{
+#define skey_lt(a, b) (strcmp((a)->key, (b)->key) < 0)
+  GK_MKQSORT(gk_skv_t, base, n, skey_lt);
+#undef skey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_skv_t in decreasing order */
+/*************************************************************************/
+void gk_skvsortd(size_t n, gk_skv_t *base)
+{
+#define skey_gt(a, b) (strcmp((a)->key, (b)->key) > 0)
+  GK_MKQSORT(gk_skv_t, base, n, skey_gt);
+#undef skey_gt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idxkv_t in increasing order */
+/*************************************************************************/
+void gk_idxkvsorti(size_t n, gk_idxkv_t *base)
+{
+#define idxkey_lt(a, b) ((a)->key < (b)->key)
+  GK_MKQSORT(gk_idxkv_t, base, n, idxkey_lt);
+#undef idxkey_lt
+}
+
+
+/*************************************************************************/
+/*! Sorts an array of gk_idxkv_t in decreasing order */
+/*************************************************************************/
+void gk_idxkvsortd(size_t n, gk_idxkv_t *base)
+{
+#define idxkey_gt(a, b) ((a)->key > (b)->key)
+  GK_MKQSORT(gk_idxkv_t, base, n, idxkey_gt);
+#undef idxkey_gt
+}
diff --git a/string.c b/string.c
new file mode 100644
index 0000000..4a3fb14
--- /dev/null
+++ b/string.c
@@ -0,0 +1,530 @@
+/************************************************************************/
+/*! \file 
+
+\brief Functions for manipulating strings.
+
+Various functions for manipulating strings. Some of these functions 
+provide new functionality, whereas others are drop-in replacements
+of standard functions (but with enhanced functionality).
+
+\date Started 11/1/99
+\author George
+\version $Id: string.c 14330 2013-05-18 12:15:15Z karypis $
+*/
+/************************************************************************/
+
+/* the following is for strptime() */
+#define _XOPEN_SOURCE
+#include <time.h>
+#undef _XOPEN_SOURCE
+
+#include <GKlib.h>
+
+
+
+/************************************************************************/
+/*! \brief Replaces certain characters in a string.
+ 
+This function takes a string and replaces all the characters in the
+\c fromlist with the corresponding characters from the \c tolist. 
+That is, each occurence of <tt>fromlist[i]</tt> is replaced by 
+<tt>tolist[i]</tt>. 
+If the \c tolist is shorter than \c fromlist, then the corresponding 
+characters are deleted. The modifications on \c str are done in place. 
+It tries to provide a functionality similar to Perl's \b tr// function.
+
+\param str is the string whose characters will be replaced.
+\param fromlist is the set of characters to be replaced.
+\param tolist is the set of replacement characters .
+\returns A pointer to \c str itself.
+*/
+/************************************************************************/
+char *gk_strchr_replace(char *str, char *fromlist, char *tolist)
+{
+  ssize_t i, j, k, len, fromlen, tolen;
+
+  len     = strlen(str);
+  fromlen = strlen(fromlist);
+  tolen   = strlen(tolist);
+
+  for (i=j=0; i<len; i++) {
+    for (k=0; k<fromlen; k++) {
+      if (str[i] == fromlist[k]) {
+        if (k < tolen) 
+          str[j++] = tolist[k];
+        break;
+      }
+    }
+    if (k == fromlen)
+      str[j++] = str[i];
+  }
+  str[j] = '\0';
+
+  return str;
+}
+
+
+
+/************************************************************************/
+/*! \brief Regex-based search-and-replace function
+ 
+This function is a C implementation of Perl's <tt> s//</tt> regular-expression
+based substitution function.
+
+\param str 
+  is the input string on which the operation will be performed.
+\param pattern
+  is the regular expression for the pattern to be matched for substitution.
+\param replacement
+  is the replacement string, in which the possible captured pattern substrings
+  are referred to as $1, $2, ..., $9. The entire matched pattern is refered
+  to as $0.
+\param options
+  is a string specified options for the substitution operation. Currently the
+  <tt>"i"</tt> (case insensitive) and <tt>"g"</tt> (global substitution) are 
+  supported.
+\param new_str 
+  is a reference to a pointer that will store a pointer to the newly created 
+  string that results from the substitutions. This string is allocated via 
+  gk_malloc() and needs to be freed using gk_free(). The string is returned 
+  even if no substitutions were performed.
+\returns
+  If successful, it returns 1 + the number of substitutions that were performed.
+  Thus, if no substitutions were performed, the returned value will be 1.
+  Otherwise it returns 0. In case of error, a meaningful error message is 
+  returned in <tt>newstr</tt>, which also needs to be freed afterwards.
+*/
+/************************************************************************/
+int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options,
+      char **new_str)
+{
+  ssize_t i, len, rlen, nlen, offset, noffset;
+  int j, rc, flags, global, nmatches;
+  regex_t re;
+  regmatch_t matches[10];
+
+  
+  /* Parse the options */
+  flags = REG_EXTENDED;
+  if (strchr(options, 'i') != NULL)
+    flags = flags | REG_ICASE;
+  global = (strchr(options, 'g') != NULL ? 1 : 0);
+
+
+  /* Compile the regex */
+  if ((rc = regcomp(&re, pattern, flags)) != 0) { 
+    len = regerror(rc, &re, NULL, 0);
+    *new_str = gk_cmalloc(len, "gk_strstr_replace: new_str");
+    regerror(rc, &re, *new_str, len);
+    return 0;
+  }
+
+  /* Prepare the output string */
+  len = strlen(str);
+  nlen = 2*len;
+  noffset = 0;
+  *new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str");
+
+
+  /* Get into the matching-replacing loop */
+  rlen = strlen(replacement);
+  offset = 0;
+  nmatches = 0;
+  do {
+    rc = regexec(&re, str+offset, 10, matches, 0);
+
+    if (rc == REG_ESPACE) {
+      gk_free((void **)new_str, LTERM);
+      *new_str = gk_strdup("regexec ran out of memory.");
+      regfree(&re);
+      return 0;
+    }
+    else if (rc == REG_NOMATCH) {
+      if (nlen-noffset < len-offset) {
+        nlen += (len-offset) - (nlen-noffset);
+        *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+      }
+      strcpy(*new_str+noffset, str+offset);
+      noffset += (len-offset);
+      break;
+    }
+    else { /* A match was found! */
+      nmatches++;
+
+      /* Copy the left unmatched portion of the string */
+      if (matches[0].rm_so > 0) {
+        if (nlen-noffset < matches[0].rm_so) {
+          nlen += matches[0].rm_so - (nlen-noffset);
+          *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+        }
+        strncpy(*new_str+noffset, str+offset, matches[0].rm_so);
+        noffset += matches[0].rm_so;
+      }
+
+      /* Go and append the replacement string */
+      for (i=0; i<rlen; i++) {
+        switch (replacement[i]) {
+          case '\\':
+            if (i+1 < rlen) {
+              if (nlen-noffset < 1) {
+                nlen += nlen + 1;
+                *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+              }
+              *new_str[noffset++] = replacement[++i];
+            }
+            else {
+              gk_free((void **)new_str, LTERM);
+              *new_str = gk_strdup("Error in replacement string. Missing character following '\'.");
+              regfree(&re);
+              return 0;
+            }
+            break;
+
+          case '$':
+            if (i+1 < rlen) {
+              j = (int)(replacement[++i] - '0');
+              if (j < 0 || j > 9) {
+                gk_free((void **)new_str, LTERM);
+                *new_str = gk_strdup("Error in captured subexpression specification.");
+                regfree(&re);
+                return 0;
+              }
+
+              if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) {
+                nlen += nlen + (matches[j].rm_eo-matches[j].rm_so);
+                *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+              }
+
+              strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo);
+              noffset += matches[j].rm_eo-matches[j].rm_so;
+            }
+            else {
+              gk_free((void **)new_str, LTERM);
+              *new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'.");
+              regfree(&re);
+              return 0;
+            }
+            break;
+
+          default:
+            if (nlen-noffset < 1) {
+              nlen += nlen + 1;
+              *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+            }
+            (*new_str)[noffset++] = replacement[i];
+        }
+      }
+
+      /* Update the offset of str for the next match */
+      offset += matches[0].rm_eo;
+
+      if (!global) {
+        /* Copy the right portion of the string if no 'g' option */
+        if (nlen-noffset < len-offset) {
+          nlen += (len-offset) - (nlen-noffset);
+          *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
+        }
+        strcpy(*new_str+noffset, str+offset);
+        noffset += (len-offset);
+      }
+    }
+  } while (global);
+
+  (*new_str)[noffset] = '\0';
+
+  regfree(&re);
+  return nmatches + 1;
+
+}
+
+
+
+/************************************************************************/
+/*! \brief Prunes characters from the end of the string.
+
+This function removes any trailing characters that are included in the
+\c rmlist. The trimming stops at the last character (i.e., first character 
+from the end) that is not in \c rmlist.  
+This function can be used to removed trailing spaces, newlines, etc.
+This is a distructive operation as it modifies the string.
+
+\param str is the string that will be trimmed.
+\param rmlist contains the set of characters that will be removed.
+\returns A pointer to \c str itself.
+\sa gk_strhprune()
+*/
+/*************************************************************************/
+char *gk_strtprune(char *str, char *rmlist)
+{
+  ssize_t i, j, len;
+
+  len = strlen(rmlist);
+
+  for (i=strlen(str)-1; i>=0; i--) {
+    for (j=0; j<len; j++) {
+      if (str[i] == rmlist[j])
+        break;
+    }
+    if (j == len)
+      break;
+  }
+
+  str[i+1] = '\0';
+
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Prunes characters from the beginning of the string.
+
+This function removes any starting characters that are included in the
+\c rmlist. The trimming stops at the first character that is not in 
+\c rmlist.
+This function can be used to removed leading spaces, tabs, etc.
+This is a distructive operation as it modifies the string.
+
+\param str is the string that will be trimmed.
+\param rmlist contains the set of characters that will be removed.
+\returns A pointer to \c str itself.
+\sa gk_strtprune()
+*/
+/*************************************************************************/
+char *gk_strhprune(char *str, char *rmlist)
+{
+  ssize_t i, j, len;
+
+  len = strlen(rmlist);
+
+  for (i=0; str[i]; i++) {
+    for (j=0; j<len; j++) {
+      if (str[i] == rmlist[j])
+        break;
+    }
+    if (j == len)
+      break;
+  }
+
+  if (i>0) { /* If something needs to be removed */
+    for (j=0; str[i]; i++, j++)
+      str[j] = str[i];
+    str[j] = '\0';
+  }
+
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Converts a string to upper case.
+
+This function converts a string to upper case. This operation modifies the 
+string itself.
+
+\param str is the string whose case will be changed.
+\returns A pointer to \c str itself.
+\sa gk_strtolower()
+*/
+/*************************************************************************/
+char *gk_strtoupper(char *str)
+{
+  int i;
+
+  for (i=0; str[i]!='\0'; str[i]=toupper(str[i]), i++); 
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Converts a string to lower case.
+
+This function converts a string to lower case. This operation modifies the 
+string itself.
+
+\param str is the string whose case will be changed.
+\returns A pointer to \c str itself.
+\sa gk_strtoupper()
+*/
+/*************************************************************************/
+char *gk_strtolower(char *str)
+{
+  int i;
+
+  for (i=0; str[i]!='\0'; str[i]=tolower(str[i]), i++); 
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Duplicates a string
+
+This function is a replacement for C's standard <em>strdup()</em> function.
+The key differences between the two are that gk_strdup():
+  - uses the dynamic memory allocation routines of \e GKlib. 
+  - it correctly handles NULL input strings.
+
+The string that is returned must be freed by gk_free().
+
+\param orgstr is the string that will be duplicated.
+\returns A pointer to the newly created string.
+\sa gk_free()
+*/
+/*************************************************************************/
+char *gk_strdup(char *orgstr)
+{
+  int len;
+  char *str=NULL;
+
+  if (orgstr != NULL) {
+    len = strlen(orgstr)+1;
+    str = gk_malloc(len*sizeof(char), "gk_strdup: str");
+    strcpy(str, orgstr);
+  }
+
+  return str;
+}
+
+
+/************************************************************************/
+/*! \brief Case insensitive string comparison.
+
+This function compares two strings for equality by ignoring the case of the
+strings. 
+
+\warning This function is \b not equivalent to a case-insensitive 
+         <em>strcmp()</em> function, as it does not return ordering 
+         information.
+
+\todo Remove the above warning.
+
+\param s1 is the first string to be compared.
+\param s2 is the second string to be compared.
+\retval 1 if the strings are identical,
+\retval 0 otherwise.
+*/
+/*************************************************************************/
+int gk_strcasecmp(char *s1, char *s2)
+{
+  int i=0;
+
+  if (strlen(s1) != strlen(s2))
+    return 0;
+
+  while (s1[i] != '\0') {
+    if (tolower(s1[i]) != tolower(s2[i]))
+      return 0;
+    i++;
+  }
+
+  return 1;
+}
+
+
+/************************************************************************/
+/*! \brief Compare two strings in revere order
+
+This function is similar to strcmp but it performs the comparison as
+if the two strings were reversed.
+
+\param s1 is the first string to be compared.
+\param s2 is the second string to be compared.
+\retval -1, 0, 1, if the s1 < s2, s1 == s2, or s1 > s2.
+*/
+/*************************************************************************/
+int gk_strrcmp(char *s1, char *s2)
+{
+  int i1 = strlen(s1)-1;
+  int i2 = strlen(s2)-1;
+
+  while ((i1 >= 0) && (i2 >= 0)) {
+    if (s1[i1] != s2[i2])
+      return (s1[i1] - s2[i2]);
+    i1--;
+    i2--;
+  }
+
+  /* i1 == -1 and/or i2 == -1 */
+
+  if (i1 < i2)
+    return -1;
+  if (i1 > i2)
+    return 1;
+  return 0;
+}
+
+
+
+/************************************************************************/
+/*! \brief Converts a time_t time into a string 
+
+This function takes a time_t-specified time and returns a string-formated
+representation of the corresponding time. The format of the string is
+<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
+
+\param time is the time to be converted.
+\return It returns a pointer to a statically allocated string that is 
+        over-written in successive calls of this function. If the 
+        conversion failed, it returns NULL.
+
+*/
+/*************************************************************************/
+char *gk_time2str(time_t time)
+{
+  static char datestr[128];
+  struct tm *tm;
+
+  tm = localtime(&time);
+
+  if (strftime(datestr, 128, "%m/%d/%Y %H:%M:%S", tm) == 0)
+    return NULL;
+  else
+    return datestr;
+}
+
+
+
+#if !defined(WIN32) && !defined(__MINGW32__)
+/************************************************************************/
+/*! \brief Converts a date/time string into its equivalent time_t value
+
+This function takes date and/or time specification and converts it in
+the equivalent time_t representation. The conversion is done using the
+strptime() function. The format that gk_str2time() understands is
+<em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
+
+\param str is the date/time string to be converted.
+\return If the conversion was successful it returns the time, otherwise 
+        it returns -1.
+*/
+/*************************************************************************/
+time_t gk_str2time(char *str)
+{
+  struct tm time;
+  time_t rtime;
+
+  memset(&time, '\0', sizeof(time));
+  
+  if (strptime(str, "%m/%d/%Y %H:%M:%S", &time) == NULL)
+    return -1;
+
+  rtime = mktime(&time);
+  return (rtime < 0 ? 0 : rtime);
+}
+#endif
+
+
+/*************************************************************************
+* This function returns the ID of a particular string based on the 
+* supplied StringMap array
+**************************************************************************/
+int gk_GetStringID(gk_StringMap_t *strmap, char *key)
+{
+  int i;
+
+  for (i=0; strmap[i].name; i++) {
+    if (gk_strcasecmp(key, strmap[i].name))
+      return strmap[i].id;
+  }
+
+  return -1;
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 0000000..8584820
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Build program.
+add_executable(strings strings.c)
+add_executable(gksort gksort.c)
+add_executable(fis fis.c)
+add_executable(gkrw rw.c)
+add_executable(gkgraph gkgraph.c)
+add_executable(csrcnv csrcnv.c)
+add_executable(grKx grKx.c)
+add_executable(m2mnbrs m2mnbrs.c)
+add_executable(cmpnbrs cmpnbrs.c)
+add_executable(splatt2svd splatt2svd.c)
+add_executable(gkuniq gkuniq.c)
+
+foreach(prog strings gksort fis gkrw gkgraph csrcnv grKx m2mnbrs cmpnbrs splatt2svd gkuniq)
+  target_link_libraries(${prog} GKlib)
+endforeach(prog)
+
+# Install a subset of them
+install(TARGETS csrcnv RUNTIME DESTINATION bin)
diff --git a/test/cmpnbrs.c b/test/cmpnbrs.c
new file mode 100644
index 0000000..6e3ace8
--- /dev/null
+++ b/test/cmpnbrs.c
@@ -0,0 +1,301 @@
+/*!
+\file  
+\brief It takes as input two CSR matrices A and B and computes how
+       similar AA' and A'A are to BB' and B'B, respectively in terms
+       of the cosine similarity of the corresponding rows.
+
+\date 11/09/2015
+\author George
+\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int simtype;             /*!< The similarity type to use */
+  int verbosity;           /*!< The reporting verbosity level */
+
+  char *afile;             /*!< The file storing the query documents */
+  char *bfile;             /*!< The file storing the collection documents */
+
+  /* timers */
+  double timer_global;
+} params_t;
+
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+/* Versions */
+#define VER_MAJOR           0
+#define VER_MINOR           1
+#define VER_SUBMINOR        0
+
+/* Command-line option codes */
+#define CMD_SIMTYPE         10
+#define CMD_VERBOSITY       70
+#define CMD_HELP            100
+
+/* The text labels for the different simtypes */
+static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""};
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"simtype",           1,      0,      CMD_SIMTYPE},
+  {"verbosity",         1,      0,      CMD_VERBOSITY},
+
+  {"help",              0,      0,      CMD_HELP},
+  {0,                   0,      0,      0}
+};
+
+static gk_StringMap_t simtype_options[] = {
+  {"dotp",               GK_CSR_DOTP},
+  {"cos",                GK_CSR_COS},
+  {"jac",                GK_CSR_JAC},
+  {NULL,                 0}
+};
+
+
+/*-------------------------------------------------------------------
+ * Mini help
+ *-------------------------------------------------------------------*/
+static char helpstr[][100] =
+{
+" ",
+"Usage: cmpnbrs [options] afile bfile",
+" ",
+" Options",
+"  -simtype=string",
+"     Specifies the type of similarity to use. Possible values are:",
+"       dotp  - Dot-product similarity [default]",
+"       cos   - Cosine similarity",
+"       jac   - Jacquard similarity", 
+" ",
+"  -verbosity=int",
+"     Specifies the level of debugging information to be displayed.",
+"     Default value is 0.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[]);
+double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat, gk_csr_t *bmat);
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->simtype   = GK_CSR_DOTP;
+  params->verbosity = -1;
+  params->afile     = NULL;
+  params->bfile     = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_SIMTYPE:
+        if (gk_optarg) {
+          if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1)
+            errexit("Invalid simtype of %s.\n", gk_optarg);
+        }
+        break;
+
+      case CMD_VERBOSITY:
+        if (gk_optarg) params->verbosity = atoi(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(EXIT_SUCCESS);
+        break;
+
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(EXIT_FAILURE);
+    }
+  }
+
+  /* Get the input/output file info */
+  if (argc-gk_optind != 2) {
+    printf("Missing input file info.\n  Use %s -help for a summary of the options.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  params->afile = gk_strdup(argv[gk_optind++]);
+  params->bfile = gk_strdup(argv[gk_optind++]);
+
+  if (!gk_fexists(params->afile))
+    errexit("input file %s does not exist.\n", params->afile);
+  if (!gk_fexists(params->bfile))
+    errexit("input file %s does not exist.\n", params->bfile);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the program */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  params_t *params;
+  gk_csr_t *amat, *bmat, *amatt, *bmatt;
+  int rc = EXIT_SUCCESS;
+
+  params = parse_cmdline(argc, argv);
+
+  amat = gk_csr_Read(params->afile, GK_CSR_FMT_CSR, 1, 0);
+  bmat = gk_csr_Read(params->bfile, GK_CSR_FMT_CSR, 1, 0);
+
+  /* make the matrices of similar dimensions (if neccessary) */
+  GKASSERT(amat->nrows == bmat->nrows);
+  amat->ncols = gk_max(amat->ncols, bmat->ncols);
+  bmat->ncols = amat->ncols;
+
+  /* create the transpose matrices */
+  amatt = gk_csr_Transpose(amat);
+  bmatt = gk_csr_Transpose(bmat);
+
+  printf("********************************************************************************\n");
+  printf("cmpnbrs (%d.%d.%d) Copyright 2015, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR);
+  printf("  simtype=%s\n",
+      simtypenames[params->simtype]);
+  printf("  afile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
+      params->afile, amat->nrows, amat->ncols, amat->rowptr[amat->nrows]);
+  printf("  bfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
+      params->bfile, bmat->nrows, bmat->ncols, bmat->rowptr[bmat->nrows]);
+
+  gk_clearwctimer(params->timer_global);
+  gk_startwctimer(params->timer_global);
+
+  printf("SIM(AA', BB'): %.5lf\t", ComputeNeighborhoodSimilarity(params, amat, bmat));
+  printf("SIM(A'A, B'B): %.5lf\n", ComputeNeighborhoodSimilarity(params, amatt, bmatt));
+
+  gk_stopwctimer(params->timer_global);
+
+  printf("    wclock: %.2lfs\n", gk_getwctimer(params->timer_global));
+  printf("********************************************************************************\n");
+
+  gk_csr_Free(&amat);
+  gk_csr_Free(&bmat);
+  gk_csr_Free(&amatt);
+  gk_csr_Free(&bmatt);
+
+  exit(rc);
+}
+
+
+/*************************************************************************/
+/*! Compares the neighbors of AA' vs BB' */
+/**************************************************************************/
+double ComputeNeighborhoodSimilarity(params_t *params, gk_csr_t *amat, 
+           gk_csr_t *bmat)
+{
+  int iR, iH, nahits, nbhits, ncmps;
+  int32_t *marker;
+  gk_fkv_t *ahits, *bhits, *cand;
+  double tabsim, abdot, anorm2, bnorm2, *avec, *bvec;
+
+  /* if cosine, make rows unit length */
+  if (params->simtype == GK_CSR_COS) {
+    gk_csr_Normalize(amat, GK_CSR_ROW, 2);
+    gk_csr_Normalize(bmat, GK_CSR_ROW, 2);
+  }
+
+  /* create the inverted index */
+  gk_csr_CreateIndex(amat, GK_CSR_COL);
+  gk_csr_CreateIndex(bmat, GK_CSR_COL);
+
+  /* compute the row squared norms */
+  gk_csr_ComputeSquaredNorms(amat, GK_CSR_ROW);
+  gk_csr_ComputeSquaredNorms(bmat, GK_CSR_ROW);
+
+
+  /* allocate memory for the necessary working arrays */
+  ahits  = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: ahits");
+  bhits  = gk_fkvmalloc(bmat->nrows, "ComputeNeighborhoodSimilarity: bhits");
+  marker = gk_i32smalloc(amat->nrows, -1, "ComputeNeighborhoodSimilarity: marker");
+  cand   = gk_fkvmalloc(amat->nrows, "ComputeNeighborhoodSimilarity: cand");
+  avec   = gk_dsmalloc(amat->nrows, 0.0, "ComputeNeighborhoodSimilarity: avec");
+  bvec   = gk_dsmalloc(bmat->nrows, 0.0, "ComputeNeighborhoodSimilarity: bvec");
+
+
+  /* find the best neighbors for each row in the two matrices and compute 
+     the cosine similarity between them. */
+  tabsim = 0.0;
+  ncmps  = 0;
+  for (iR=0; iR<amat->nrows; iR++) {
+    if (params->verbosity > 1)
+      printf("Working on row %7d\n", iR);
+
+    if (amat->rowptr[iR+1]-amat->rowptr[iR] == 0 ||
+        bmat->rowptr[iR+1]-bmat->rowptr[iR] == 0)
+      continue;
+
+    nahits = gk_csr_GetSimilarRows(amat, 
+                 amat->rowptr[iR+1]-amat->rowptr[iR], 
+                 amat->rowind+amat->rowptr[iR], 
+                 amat->rowval+amat->rowptr[iR], 
+                 params->simtype, amat->nrows, 0.0,
+                 ahits, marker, cand);
+
+    nbhits = gk_csr_GetSimilarRows(bmat, 
+                 bmat->rowptr[iR+1]-bmat->rowptr[iR], 
+                 bmat->rowind+bmat->rowptr[iR], 
+                 bmat->rowval+bmat->rowptr[iR], 
+                 params->simtype, bmat->nrows, 0.0,
+                 bhits, marker, cand);
+
+    if (params->verbosity > 0)
+      printf("Row %7d %7d %7d %8zd %8zd\n", iR, nahits, nbhits, 
+          amat->rowptr[iR+1]-amat->rowptr[iR], bmat->rowptr[iR+1]-bmat->rowptr[iR]);
+
+    for (iH=0; iH<nahits; iH++) 
+      avec[ahits[iH].val] = ahits[iH].key;
+    for (iH=0; iH<nbhits; iH++) 
+      bvec[bhits[iH].val] = bhits[iH].key;
+
+    for (abdot=anorm2=bnorm2=0.0, iH=0; iH<amat->nrows; iH++) {
+      abdot  += avec[iH]*bvec[iH];
+      anorm2 += avec[iH]*avec[iH];
+      bnorm2 += bvec[iH]*bvec[iH];
+    }
+    tabsim += (abdot > 0 ? abdot/sqrt(anorm2*bnorm2) : 0.0);
+    ncmps++;
+
+    for (iH=0; iH<nahits; iH++) 
+      avec[ahits[iH].val] = 0.0;
+    for (iH=0; iH<nbhits; iH++) 
+      bvec[bhits[iH].val] = 0.0;
+  }
+
+  gk_free((void **)&ahits, &bhits, &marker, &cand, &avec, &bvec, LTERM);
+
+  return tabsim/ncmps;
+}
+
diff --git a/test/csrcnv.c b/test/csrcnv.c
new file mode 100644
index 0000000..aef808e
--- /dev/null
+++ b/test/csrcnv.c
@@ -0,0 +1,397 @@
+/*!
+\file  
+\brief A simple program to convert between different matrix formats that are supported
+       by the gk_csr_Read/gk_csr_Write functions.
+
+\date 5/30/2013
+\author George
+\version \verbatim $Id: csrcnv.c 15314 2013-10-05 16:50:50Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int inf, outf;    /* input/output format */
+  int numbering;    /* input numbering (output when applicable) */
+  int readvals;     /* input values (output when applicable) */
+  int writevals;    /* output values */
+  int rshuf, cshuf; /* random shuffle of rows/columns */
+  int symmetric;    /* a symmetric shuffle */
+  int mincolfreq;   /* column prunning */
+  int maxcolfreq;   /* column prunning */
+  int minrowfreq;   /* row prunning */
+  int maxrowfreq;   /* row prunning */
+  float rownrmfltr; /* row-lowfilter threshold */
+  int compactcols;  /* if to renumber columns to eliminate empty ones */
+  int transpose;    /* transpose the output matrix */
+  char *srenumber;  /* the iperm file for the symmetric renumbering */
+  char *infile;     /* input file */
+  char *outfile;    /* output file */
+} params_t;
+
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NUMONE        1
+#define CMD_NOREADVALS    2
+#define CMD_NOWRITEVALS   3
+#define CMD_RSHUF         4
+#define CMD_CSHUF         5
+#define CMD_SYMMETRIC     6
+#define CMD_MINCOLFREQ    7
+#define CMD_MAXCOLFREQ    8
+#define CMD_MINROWFREQ    9
+#define CMD_MAXROWFREQ    10
+#define CMD_ROWNRMFLTR    11
+#define CMD_COMPACTCOLS   12
+#define CMD_TRANSPOSE     13
+#define CMD_SRENUMBER     14
+#define CMD_HELP          100
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"numone",      0,      0,      CMD_NUMONE},
+  {"noreadvals",  0,      0,      CMD_NOREADVALS},
+  {"nowritevals", 0,      0,      CMD_NOWRITEVALS},
+  {"rshuf",       0,      0,      CMD_RSHUF},
+  {"cshuf",       0,      0,      CMD_CSHUF},
+  {"symmetric",   0,      0,      CMD_SYMMETRIC},
+  {"mincolfreq",  1,      0,      CMD_MINCOLFREQ},
+  {"maxcolfreq",  1,      0,      CMD_MAXCOLFREQ},
+  {"minrowfreq",  1,      0,      CMD_MINROWFREQ},
+  {"maxrowfreq",  1,      0,      CMD_MAXROWFREQ},
+  {"rownrmfltr",  1,      0,      CMD_ROWNRMFLTR},
+  {"compactcols", 0,      0,      CMD_COMPACTCOLS},
+  {"transpose",   0,      0,      CMD_TRANSPOSE},
+  {"srenumber",   1,      0,      CMD_SRENUMBER},
+  {"help",        0,      0,      CMD_HELP},
+  {0,             0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: csrconv [options] <infile> <inf> <outfile> <outf>",
+" ",
+" Required parameters",
+"  infile, outfile",
+"     The name of the input/output CSR file.",
+" ",
+"  inf/outf",
+"     The format of the input/output file.",
+"     Supported values are:",
+"        1  GK_CSR_FMT_CLUTO",
+"        2  GK_CSR_FMT_CSR",
+"        3  GK_CSR_FMT_METIS",
+"        4  GK_CSR_FMT_BINROW",
+"        6  GK_CSR_FMT_IJV",
+"        7  GK_CSR_FMT_BIJV",
+" ",
+" Optional parameters",
+"  -numone",
+"     Specifies that the numbering of the input file starts from 1. ",
+"     It only applies to CSR/IJV formats.",
+" ",
+"  -nowritevals",
+"     Specifies that no values will be output.",
+" ",
+"  -noreadvals",
+"     Specifies that the values will not be read when applicable.",
+" ",
+"  -rshuf",
+"     Specifies that the rows will be randmly shuffled prior to output.",
+" ",
+"  -cshuf",
+"     Specifies that the columns will be randmly shuffled prior to output.",
+" ",
+"  -symmetric",
+"     Specifies that the row+column shuffling will be symmetric.",
+" ",
+"  -mincolfreq=int",
+"     Used to prune infrequent columns.",
+" ",
+"  -maxcolfreq=int",
+"     Used to prune frequent columns.",
+" ",
+"  -minrowfreq=int",
+"     Used to prune infrequent rows.",
+" ",
+"  -maxrowfreq=int",
+"     Used to prune frequent.",
+" ",
+"  -rownrmfltr=float",
+"     The parameter to use for the row-wise low filter.",
+" ",
+"  -compactcols",
+"     Specifies if empty columns will be removed and the columns renumbered.",
+" ",
+"  -transpose",
+"     Specifies that the transposed matrix will be written.",
+" ",
+"  -srenumber=iperm-file",
+"     Performs a symmetric renumbering based on the provided iperm file.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: csrconv [options] <infile> <inf> <outfile> <outf>",
+"          use 'csrconv -help' for a summary of the options.",
+""
+};
+ 
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->numbering = 0;
+  params->readvals  = 1;
+  params->writevals = 1;
+  params->rshuf     = 0;
+  params->cshuf     = 0;
+  params->symmetric = 0;
+  params->transpose = 0;
+  params->srenumber = NULL;
+
+  params->mincolfreq  = -1;
+  params->minrowfreq  = -1;
+  params->maxcolfreq  = -1;
+  params->maxrowfreq  = -1;
+  params->rownrmfltr  = -1;
+  params->compactcols = 0;
+
+  params->inf       = -1;
+  params->outf      = -1;
+  params->infile    = NULL;
+  params->outfile   = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_NUMONE:
+        params->numbering = 1;
+        break;
+      case CMD_NOREADVALS:
+        params->readvals = 0;
+        break;
+      case CMD_NOWRITEVALS:
+        params->writevals = 0;
+        break;
+      case CMD_RSHUF:
+        params->rshuf = 1;
+        break;
+      case CMD_CSHUF:
+        params->cshuf = 1;
+        break;
+      case CMD_SYMMETRIC:
+        params->symmetric = 1;
+        break;
+      case CMD_TRANSPOSE:
+        params->transpose = 1;
+        break;
+
+
+      case CMD_MINCOLFREQ:
+        if (gk_optarg) params->mincolfreq = atoi(gk_optarg);
+        break;
+      case CMD_MINROWFREQ:
+        if (gk_optarg) params->minrowfreq = atoi(gk_optarg);
+        break;
+      case CMD_MAXCOLFREQ:
+        if (gk_optarg) params->maxcolfreq = atoi(gk_optarg);
+        break;
+      case CMD_MAXROWFREQ:
+        if (gk_optarg) params->maxrowfreq = atoi(gk_optarg);
+        break;
+      case CMD_ROWNRMFLTR:
+        if (gk_optarg) params->rownrmfltr = atof(gk_optarg);
+        break;
+      case CMD_COMPACTCOLS:
+        params->compactcols = 1;
+        break;
+
+      case CMD_SRENUMBER:
+        if (gk_optarg) {
+          params->srenumber = gk_strdup(gk_optarg);
+          if (!gk_fexists(params->srenumber))
+            errexit("srenumber file %s does not exist.\n", params->srenumber);
+        }
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 4) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+  params->inf     = atoi(argv[gk_optind++]);
+  params->outfile = gk_strdup(argv[gk_optind++]);
+  params->outf    = atoi(argv[gk_optind++]);
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  int what;
+  params_t *params;
+  gk_csr_t *mat, *mat1, *smat;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  /* read the data */
+  mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering);
+
+  /* deal with weird transformations */
+  if (params->mincolfreq != -1 || params->maxcolfreq != -1) {
+    params->mincolfreq = (params->mincolfreq == -1 ? 0 : params->mincolfreq);
+    params->maxcolfreq = (params->maxcolfreq == -1 ? mat->nrows : params->maxcolfreq);
+
+    printf("Column prune: %d %d; nnz: %zd => ", 
+        params->mincolfreq, params->maxcolfreq, mat->rowptr[mat->nrows]);
+    mat1 = gk_csr_Prune(mat, GK_CSR_COL, params->mincolfreq, params->maxcolfreq);
+    gk_csr_Free(&mat);
+    mat = mat1;
+    mat1 = NULL;
+
+    printf("%zd\n", mat->rowptr[mat->nrows]);
+  }
+  
+  if (params->minrowfreq != -1 || params->maxrowfreq != -1) {
+    params->minrowfreq = (params->minrowfreq == -1 ? 0 : params->minrowfreq);
+    params->maxrowfreq = (params->maxrowfreq == -1 ? mat->ncols : params->maxrowfreq);
+
+    printf("Row prune: %d %d; nnz: %zd => ", 
+        params->minrowfreq, params->maxrowfreq, mat->rowptr[mat->nrows]);
+    mat1 = gk_csr_Prune(mat, GK_CSR_ROW, params->minrowfreq, params->maxrowfreq);
+    gk_csr_Free(&mat);
+    mat = mat1;
+    mat1 = NULL;
+
+    printf("%zd\n", mat->rowptr[mat->nrows]);
+  }
+
+  if (params->rownrmfltr >= 0.0) {
+    //gk_csr_Scale(mat, GK_CSR_LOG);
+    //gk_csr_Scale(mat, GK_CSR_IDF2);
+
+    printf("Row low filter: %f; nnz: %zd => ", params->rownrmfltr, mat->rowptr[mat->nrows]);
+    mat1 = gk_csr_LowFilter(mat, GK_CSR_ROW, 2, params->rownrmfltr);
+    gk_csr_Normalize(mat1, GK_CSR_ROW, 2);
+
+    gk_csr_Free(&mat);
+    mat = mat1;
+    mat1 = NULL;
+
+    printf("%zd\n", mat->rowptr[mat->nrows]);
+  }
+
+  if (params->compactcols) {
+    printf("Compacting columns: %d => ", mat->ncols);
+    gk_csr_CompactColumns(mat);
+    printf("%d\n", mat->ncols);
+  }
+
+
+  if (params->rshuf || params->cshuf) {
+    if (params->rshuf && params->cshuf)
+      what = GK_CSR_ROWCOL;
+    else if (params->rshuf)
+      what = GK_CSR_ROW;
+    else
+      what = GK_CSR_COL;
+
+    smat = gk_csr_Shuffle(mat, what, params->symmetric);
+    gk_csr_Free(&mat);
+    mat = smat;
+  }
+
+
+  if (params->srenumber) {
+    int32_t i;
+    size_t nlines;
+    int32_t *iperm;
+    gk_csr_t *smat;
+
+    iperm = gk_i32readfile(params->srenumber, &nlines);
+    if (nlines != mat->nrows && nlines != mat->ncols)
+      errexit("The nlines=%zud of srenumber file does not match nrows: %d, ncols: %d\n", nlines, mat->nrows, mat->ncols);
+
+    if (gk_i32max(nlines, iperm, 1) >= nlines && gk_i32min(nlines, iperm, 1) <= 0) 
+      errexit("The srenumber iperm seems to be wrong.\n");
+    
+    if (gk_i32max(nlines, iperm, 1) == nlines) { /* need to renumber */
+      for (i=0; i<nlines; i++)
+        iperm[i]--;
+    }
+
+    smat = gk_csr_ReorderSymmetric(mat, iperm, NULL);
+    gk_csr_Free(&mat);
+    mat = smat;
+
+    gk_free((void **)&iperm, LTERM);
+  }
+
+  if (params->writevals && mat->rowval == NULL) 
+    mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval");
+
+  if (params->transpose) {
+    mat1 = gk_csr_Transpose(mat);
+    gk_csr_Free(&mat);
+    mat = mat1;
+    mat1 = NULL;
+  }
+
+
+
+  gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0);
+
+  gk_csr_Free(&mat);
+
+}
+
diff --git a/test/fis.c b/test/fis.c
new file mode 100644
index 0000000..084a4b6
--- /dev/null
+++ b/test/fis.c
@@ -0,0 +1,286 @@
+/*!
+\file  
+\brief A simple frequent itemset discovery program to test GKlib's routines
+
+\date 6/12/2008
+\author George
+\version \verbatim $Id: fis.c 11075 2011-11-11 22:31:52Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  ssize_t minlen, maxlen;
+  ssize_t minfreq, maxfreq;
+  char *filename;
+  int silent;
+  ssize_t nitemsets;
+  char *clabelfile;
+  char **clabels;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_MINLEN      1
+#define CMD_MAXLEN      2
+#define CMD_MINFREQ     3
+#define CMD_MAXFREQ     4
+#define CMD_SILENT      5
+#define CMD_CLABELFILE  6
+#define CMD_HELP        10
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"minlen",        1,      0,      CMD_MINLEN},
+  {"maxlen",        1,      0,      CMD_MAXLEN},
+  {"minfreq",       1,      0,      CMD_MINFREQ},
+  {"maxfreq",       1,      0,      CMD_MAXFREQ},
+  {"silent",        0,      0,      CMD_SILENT},
+  {"clabels",       1,      0,      CMD_CLABELFILE},
+  {"help",          0,      0,      CMD_HELP},
+  {0,               0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: fis [options] <mat-file>",
+" ",
+" Required parameters",
+"  mat-file",
+"     The name of the file storing the transactions. The file is in ",
+"     Cluto's .mat format.",
+" ",
+" Optional parameters",
+"  -minlen=int",
+"     Specifies the minimum length of the patterns. [default: 1]",
+" ",
+"  -maxlen=int",
+"     Specifies the maximum length of the patterns. [default: none]",
+" ",
+"  -minfreq=int",
+"     Specifies the minimum frequency of the patterns. [default: 10]",
+" ",
+"  -maxfreq=int",
+"     Specifies the maximum frequency of the patterns. [default: none]",
+" ",
+"  -silent",
+"     Does not print the discovered itemsets.",
+" ",
+"  -clabels=filename",
+"     Specifies the name of the file that stores the column labels.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: fis [options] <mat-file>",
+"          use 'fis -help' for a summary of the options.",
+""
+};
+ 
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat);
+void print_final_info(params_t *params);
+params_t *parse_cmdline(int argc, char *argv[]);
+void print_an_itemset(void *stateptr, int nitems, int *itemind, 
+                      int ntrans, int *tranind);
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  ssize_t i;
+  char line[8192];
+  FILE *fpin;
+  params_t *params;
+  gk_csr_t *mat;
+ 
+  params = parse_cmdline(argc, argv);
+  params->nitemsets = 0;
+
+  /* read the data */
+  mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1);
+  gk_csr_CreateIndex(mat, GK_CSR_COL);
+
+  /* read the column labels */
+  params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels");
+  if (params->clabelfile == NULL) {
+    for (i=0; i<mat->ncols; i++) {
+      sprintf(line, "%zd", i);
+      params->clabels[i] = gk_strdup(line);
+    }
+  }
+  else {
+    fpin = gk_fopen(params->clabelfile, "r", "main: fpin");
+    for (i=0; i<mat->ncols; i++) {
+      if (fgets(line, 8192, fpin) == NULL)
+        errexit("Failed on fgets.\n");
+      params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t"));
+    }
+    gk_fclose(fpin);
+  }
+
+
+  print_init_info(params, mat);
+
+  gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind,
+      params->minfreq, params->maxfreq, params->minlen, params->maxlen,
+      &print_an_itemset, (void *)params);
+
+  printf("Total itemsets found: %zd\n", params->nitemsets);
+
+  print_final_info(params);
+}  
+
+
+
+/*************************************************************************/
+/*! This function prints run parameters */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat)
+{
+  printf("*******************************************************************************\n");
+  printf(" fis\n\n");
+  printf("Matrix Information ---------------------------------------------------------\n");
+  printf(" input file=%s, [%d, %d, %zd]\n", 
+      params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
+
+  printf("\n");
+  printf("Options --------------------------------------------------------------------\n");
+  printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n",
+      params->minlen, params->maxlen, params->minfreq, params->maxfreq);
+
+  printf("\n");
+  printf("Finding patterns... -----------------------------------------------------\n");
+}
+
+
+/*************************************************************************/
+/*! This function prints final statistics */
+/*************************************************************************/
+void print_final_info(params_t *params)
+{
+  printf("\n");
+  printf("Memory Usage Information -----------------------------------------------------\n");
+  printf("   Maximum memory used:              %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
+  printf("   Current memory used:              %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
+  printf("********************************************************************************\n");
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->minlen     = 1;
+  params->maxlen     = -1;
+  params->minfreq    = 10;
+  params->maxfreq    = -1;
+  params->silent     = 0;
+  params->filename   = NULL;
+  params->clabelfile = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_MINLEN:
+        if (gk_optarg) params->minlen = atoi(gk_optarg);
+        break;
+      case CMD_MAXLEN:
+        if (gk_optarg) params->maxlen = atoi(gk_optarg);
+        break;
+      case CMD_MINFREQ:
+        if (gk_optarg) params->minfreq = atoi(gk_optarg);
+        break;
+      case CMD_MAXFREQ:
+        if (gk_optarg) params->maxfreq = atoi(gk_optarg);
+        break;
+
+      case CMD_SILENT:
+        params->silent = 1;
+        break;
+
+      case CMD_CLABELFILE:
+        if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 1) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->filename = gk_strdup(argv[gk_optind++]);
+
+  if (!gk_fexists(params->filename))
+    errexit("input file %s does not exist.\n", params->filename);
+
+  return params;
+}
+
+
+
+/*************************************************************************/
+/*! This is the callback function for the itemset discovery routine */
+/*************************************************************************/
+void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans, 
+         int *transids)
+{
+  ssize_t i;
+  params_t *params;
+
+  params = (params_t *)stateptr;
+  params->nitemsets++;
+
+  if (!params->silent) {
+    printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans);
+    for (i=0; i<nitems; i++)
+      printf(" %s", params->clabels[itemids[i]]);
+    printf("\n");
+    for (i=0; i<ntrans; i++)
+      printf(" %d\n", transids[i]);
+    printf("\n");
+  }
+}
diff --git a/test/gkgraph.c b/test/gkgraph.c
new file mode 100644
index 0000000..9131464
--- /dev/null
+++ b/test/gkgraph.c
@@ -0,0 +1,845 @@
+/*!
+\file  
+\brief A simple program to try out some graph routines
+
+\date 6/12/2008
+\author George
+\version \verbatim $Id: gkgraph.c 17700 2014-09-27 18:10:02Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int lnbits;
+  int cnbits;
+  int type;
+  int niter;
+  float eps;
+  float lamda;
+  int nosort;
+  int write;
+
+  char *infile;
+  char *outfile;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NITER       1
+#define CMD_EPS         2
+#define CMD_LAMDA       3
+#define CMD_TYPE        4
+#define CMD_NOSORT      5
+#define CMD_WRITE       6
+#define CMD_LNBITS      7
+#define CMD_CNBITS      8
+#define CMD_HELP        10
+
+#define CLINE32 16
+#define CLINE64 8
+#define MAXRCLOCKSPAN   (1<<20)
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"lnbits",     1,      0,      CMD_LNBITS},
+  {"cnbits",     1,      0,      CMD_CNBITS},
+  {"type",       1,      0,      CMD_TYPE},
+  {"niter",      1,      0,      CMD_NITER},
+  {"lamda",      1,      0,      CMD_LAMDA},
+  {"eps",        1,      0,      CMD_EPS},
+  {"nosort",     0,      0,      CMD_NOSORT},
+  {"write",      0,      0,      CMD_WRITE},
+  {"help",       0,      0,      CMD_HELP},
+  {0,            0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: gkgraph [options] <graph-file> [<out-file>]",
+" ",
+" Required parameters",
+"  graph-file",
+"     The name of the file storing the graph. The file is in ",
+"     Metis' graph format.",
+" ",
+" Optional parameters",
+"  -niter=int",
+"     Specifies the maximum number of iterations. [default: 100]",
+" ",
+"  -lnbits=int",
+"     Specifies the number of address bits indexing the cacheline. [default: 6]",
+" ",
+"  -cnbits=int",
+"     Specifies the number of address bits indexing the cache. [default: 13]",
+" ",
+"  -lamda=float",
+"     Specifies the follow-the-adjacent-links probability. [default: 0.80]",
+" ",
+"  -eps=float",
+"     Specifies the error tollerance. [default: 1e-10]",
+" ",
+"  -nosort",
+"     Does not sort the adjacency lists.",
+" ",
+"  -write",
+"     Output the reordered graphs.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: gkgraph [options] <graph-file> [<out-file>]",
+"          use 'gkgraph -help' for a summary of the options.",
+""
+};
+ 
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+void test_spmv(params_t *params);
+void test_tc(params_t *params);
+void sort_adjacencies(params_t *params, gk_graph_t *graph);
+double compute_spmvstats(params_t *params, gk_graph_t *graph);
+double compute_tcstats(params_t *params, gk_graph_t *graph, int32_t *iperm);
+int32_t *reorder_degrees(params_t *params, gk_graph_t *graph);
+int32_t *reorder_freqlpn(params_t *params, gk_graph_t *graph);
+int32_t *reorder_freqlpn_db(params_t *params, gk_graph_t *graph);
+int32_t *reorder_minlpn(params_t *params, gk_graph_t *graph);
+int32_t *reorder_minlpn_db(params_t *params, gk_graph_t *graph);
+void print_init_info(params_t *params, gk_graph_t *graph);
+void print_final_info(params_t *params);
+params_t *parse_cmdline(int argc, char *argv[]);
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  params_t *params;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  test_tc(params);
+}
+
+
+/*************************************************************************/
+/*! various spmv-related tests */
+/**************************************************************************/
+void test_spmv(params_t *params)
+{
+  ssize_t i, j, v;
+  gk_graph_t *graph, *pgraph;
+  int32_t *perm;
+ 
+  /* read the data */
+  graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, -1, -1, 0, 0, 0);
+
+  /* display some basic stats */
+  print_init_info(params, graph);
+
+  sort_adjacencies(params, graph);
+  if (params->write) gk_graph_Write(graph, "original.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("Input            SPMV HitRate: %.4lf\n", compute_spmvstats(params, graph));
+
+
+  v = RandomInRange(graph->nvtxs);
+  gk_graph_ComputeBFSOrdering(graph, v, &perm, NULL);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "bfs.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("BFS              SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+
+  perm = reorder_degrees(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "degrees.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("Degrees          SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+
+  perm = reorder_freqlpn(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "freqlpn.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("FreqLabelPropN   SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  perm = reorder_freqlpn_db(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "freqlpn-db.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("DBFreqLabelPropN SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  perm = reorder_minlpn(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "minlpn.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("MinLabelPropN    SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  perm = reorder_minlpn_db(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "minlpn-db.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("DBMinLabelPropN  SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  gk_graph_Free(&graph);
+
+  print_final_info(params);
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! various tc-related tests */
+/**************************************************************************/
+void test_tc(params_t *params)
+{
+  ssize_t i, j, v;
+  gk_graph_t *graph, *pgraph;
+  int32_t *perm, *iperm;
+ 
+  /* read the data */
+  graph = gk_graph_Read(params->infile, GK_GRAPH_FMT_METIS, -1, -1, 0, 0, 0);
+
+  /* display some basic stats */
+  print_init_info(params, graph);
+
+  perm = reorder_degrees(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  gk_free((void **)&perm, LTERM);
+  sort_adjacencies(params, pgraph);
+  iperm = gk_i32incset(graph->nvtxs, 0, gk_i32malloc(graph->nvtxs, "iperm"));
+  printf("Degrees          TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
+
+
+  sort_adjacencies(params, pgraph);
+  v = RandomInRange(pgraph->nvtxs);
+  gk_graph_ComputeBFSOrdering(pgraph, v, &perm, NULL);
+  for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i;
+  gk_free((void **)&perm, LTERM);
+  printf("BFS              TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
+
+
+  sort_adjacencies(params, pgraph);
+  perm = reorder_freqlpn(params, pgraph);
+  for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i;
+  gk_free((void **)&perm, LTERM);
+  printf("FreqLabelPropN   TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
+
+  sort_adjacencies(params, pgraph);
+  perm = reorder_freqlpn_db(params, pgraph);
+  for (i=0; i<graph->nvtxs; i++) iperm[perm[i]] = i;
+  gk_free((void **)&perm, LTERM);
+  printf("DBFreqLabelPropN TC HitRate: %.4lf\n", compute_tcstats(params, pgraph, iperm));
+
+
+#ifdef XXX
+  perm = reorder_minlpn(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "minlpn.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("MinLabelPropN    SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+
+  perm = reorder_minlpn_db(params, graph);
+  pgraph = gk_graph_Reorder(graph, perm, NULL);
+  sort_adjacencies(params, pgraph);
+  if (params->write) gk_graph_Write(pgraph, "minlpn-db.ijv", GK_GRAPH_FMT_IJV, 1);
+  printf("DBMinLabelPropN  SPMV HitRate: %.4lf\n", compute_spmvstats(params, pgraph));
+  gk_graph_Free(&pgraph);
+  gk_free((void **)&perm, LTERM);
+#endif
+
+  gk_free((void **)&iperm, LTERM);
+  gk_graph_Free(&graph);
+
+  print_final_info(params);
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! This function sorts the adjacency lists of the vertices in increasing
+    order.
+*/
+/*************************************************************************/
+void sort_adjacencies(params_t *params, gk_graph_t *graph)
+{
+  uint64_t i, nvtxs;
+  ssize_t *xadj; 
+  int32_t *adjncy;
+
+  if (params->nosort)
+    return;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  for (i=0; i<nvtxs; i++) 
+    gk_i32sorti(xadj[i+1]-xadj[i], adjncy+xadj[i]);
+
+  return;
+}
+
+
+/*************************************************************************/
+/*! This function analyzes the cache locality of an SPMV operation using
+    GKlib's cache simulator and returns the cache's hit rate.
+ */
+/*************************************************************************/
+double compute_spmvstats(params_t *params, gk_graph_t *graph)
+{
+  uint64_t i, nvtxs;
+  ssize_t *xadj; 
+  int32_t *adjncy, *vec;
+
+  gk_cache_t *cache = gk_cacheCreate(16, params->lnbits, params->cnbits); /* 8MB total; i7 spec */
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  vec = gk_i32malloc(nvtxs, "vec");
+  for (i=0; i<xadj[nvtxs]; i++) {
+    gk_cacheLoad(cache, (size_t)(&adjncy[i]));
+    gk_cacheLoad(cache, (size_t)(&vec[adjncy[i]]));
+  }
+
+  gk_free((void **)&vec, LTERM);
+
+  double hitrate = gk_cacheGetHitRate(cache);
+  gk_cacheDestroy(&cache);
+
+  return hitrate;
+}
+
+
+/*************************************************************************/
+/*! The hash-map-based triangle-counting routine that uses the JIK
+    triangle enumeration scheme.
+
+    This version implements the following:
+      - It does not store location information in L
+      - Reverts the order within U's adjancency lists to allow ++ traversal
+*/
+/*************************************************************************/
+double compute_tcstats(params_t *params, gk_graph_t *graph, int32_t *iperm)
+{
+  int32_t vi, vj, vjj, vk, vl, nvtxs;
+  ssize_t ei, eiend, eistart, ej, ejend, ejstart;
+  int64_t ntriangles;
+  ssize_t *xadj, *uxadj;
+  int32_t *adjncy;
+  int32_t l, hmsize, *hmap;
+  
+  gk_cache_t *cache = gk_cacheCreate(16, params->lnbits, params->cnbits); 
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  /* determine the starting location of the upper trianglular part */
+  uxadj = gk_zmalloc(nvtxs, "uxadj");
+  for (vi=0; vi<nvtxs; vi++) {
+    for (ei=xadj[vi], eiend=xadj[vi+1]; ei<eiend && adjncy[ei]<vi; ei++); 
+    uxadj[vi] = ei;
+    /* flip the order of Adj(vi)'s upper triangular adjacency list */
+    for (ej=xadj[vi+1]-1; ei<ej; ei++, ej--) {
+      vj = adjncy[ei];
+      adjncy[ei] = adjncy[ej];
+      adjncy[ej] = vj;
+    }
+  }
+
+  /* determine the size of the hash-map and convert it into a format
+     that is compatible with a bitwise AND operation */
+  for (hmsize=0, vi=0; vi<nvtxs; vi++) 
+    hmsize = gk_max(hmsize, (int32_t)(xadj[vi+1]-uxadj[vi]));
+  for (l=1; hmsize>(1<<l); l++);
+  hmsize = (1<<(l+4))-1;
+  hmap = gk_i32smalloc(hmsize+1, 0, "hmap");
+
+  for (ntriangles=0, vjj=0; vjj<nvtxs; vjj++) {
+    vj = iperm[vjj];
+
+    gk_cacheLoad(cache, (size_t)(&xadj[vj]));
+    gk_cacheLoad(cache, (size_t)(&xadj[vj+1]));
+    gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
+
+    if (xadj[vj+1]-uxadj[vj] == 0 || uxadj[vj] == xadj[vj])
+      continue;
+
+    /* hash Adj(vj) */
+    gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
+    gk_cacheLoad(cache, (size_t)(&xadj[vj+1]));
+    for (ej=uxadj[vj], ejend=xadj[vj+1]; ej<ejend; ej++) {
+      gk_cacheLoad(cache, (size_t)(&adjncy[ej]));
+      vk = adjncy[ej];
+      for (l=(vk&hmsize); 
+           gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=0; 
+           l=((l+1)&hmsize));
+      hmap[l] = vk;
+    }
+
+    /* find intersections */
+    gk_cacheLoad(cache, (size_t)(&xadj[vj]));
+    gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
+    for (ej=xadj[vj], ejend=uxadj[vj]; ej<ejend; ej++) {
+      gk_cacheLoad(cache, (size_t)(&adjncy[ej]));
+      gk_cacheLoad(cache, (size_t)(&uxadj[vi]));
+      vi = adjncy[ej];
+      for (ei=uxadj[vi]; gk_cacheLoad(cache, (size_t)(&adjncy[ei])) && adjncy[ei]>vj; ei++) {
+        vk = adjncy[ei];
+        for (l=vk&hmsize; 
+             gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=0 && hmap[l]!=vk; 
+             l=((l+1)&hmsize));
+        gk_cacheLoad(cache, (size_t)(&hmap[l]));
+        if (hmap[l] == vk) 
+          ntriangles++;
+      }
+    }
+
+    /* reset hash */
+    gk_cacheLoad(cache, (size_t)(&uxadj[vj]));
+    gk_cacheLoad(cache, (size_t)(&xadj[vj+1]));
+    for (ej=uxadj[vj], ejend=xadj[vj+1]; ej<ejend; ej++) {
+      gk_cacheLoad(cache, (size_t)(&adjncy[ej]));
+      vk = adjncy[ej];
+      for (l=(vk&hmsize); 
+           gk_cacheLoad(cache, (size_t)(&hmap[l])) && hmap[l]!=vk; 
+           l=((l+1)&hmsize));
+      hmap[l] = 0;
+    }
+  }
+  printf("& compatible hmsize: %"PRId32" #triangles: %"PRIu64"\n", hmsize, ntriangles);
+
+  gk_free((void **)&uxadj, &hmap, LTERM);
+
+  //printf("%zd %zd\n", (ssize_t)cache->nhits, (ssize_t)cache->clock);
+
+  double hitrate = gk_cacheGetHitRate(cache);
+  gk_cacheDestroy(&cache);
+
+  return hitrate;
+}
+
+
+/*************************************************************************/
+/*! This function computes an increasing degree ordering 
+*/
+/*************************************************************************/
+int32_t *reorder_degrees(params_t *params, gk_graph_t *graph)
+{
+  int i, v, u, nvtxs, range;
+  ssize_t j, *xadj; 
+  int32_t *counts, *perm;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+
+  for (range=0, i=0; i<nvtxs; i++) 
+    range = gk_max(range, xadj[i+1]-xadj[i]);
+  range++;
+
+  counts = gk_i32smalloc(range+1, 0, "counts");
+  for (i=0; i<nvtxs; i++)
+    counts[xadj[i+1]-xadj[i]]++;
+  MAKECSR(i, range, counts);
+
+  perm = gk_i32malloc(nvtxs, "perm");
+  for (i=0; i<nvtxs; i++)
+    perm[i] = counts[xadj[i+1]-xadj[i]]++;
+
+  gk_free((void **)&counts, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function re-orders the graph by:
+    - performing a fixed number of most-popular label propagation iterations
+    - locally renumbers the vertices with the same label
+*/
+/*************************************************************************/
+int32_t *reorder_freqlpn(params_t *params, gk_graph_t *graph)
+{
+  int32_t i, ii, k, nvtxs, maxlbl;
+  ssize_t j, *xadj; 
+  int32_t *adjncy, *labels, *freq, *perm;
+  gk_i32kv_t *cand;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
+  freq   = gk_i32smalloc(nvtxs, 0, "freq");
+  perm   = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
+
+  for (k=0; k<params->niter; k++) {
+    gk_i32randArrayPermuteFine(nvtxs, perm, 0);
+    for (ii=0; ii<nvtxs; ii++) {
+      i = perm[ii];
+      maxlbl = labels[adjncy[xadj[i]]];
+      freq[maxlbl] = 1;
+      for (j=xadj[i]+1; j<xadj[i+1]; j++) {
+        freq[labels[adjncy[j]]]++;
+        if (freq[maxlbl] < freq[labels[adjncy[j]]])
+          maxlbl = labels[adjncy[j]];
+        else if (freq[maxlbl] == freq[labels[adjncy[j]]]) {
+          if (RandomInRange(2))
+            maxlbl = labels[adjncy[j]];
+        }
+      }
+      for (j=xadj[i]; j<xadj[i+1]; j++) 
+        freq[labels[adjncy[j]]] = 0;
+      labels[i] = maxlbl;
+    }
+  }
+
+  cand = gk_i32kvmalloc(nvtxs, "cand");
+  for (i=0; i<nvtxs; i++) {
+    cand[i].key = labels[i];
+    cand[i].val = i;
+  }
+  gk_i32kvsorti(nvtxs, cand);
+
+  for (i=0; i<nvtxs; i++)
+    perm[cand[i].val] = i;
+
+  gk_free((void **)&labels, &freq, &cand, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function re-orders the graph by:
+    - performing a fixed number of most-popular label propagation iterations
+    - restricts that propagation to take place within similar degree buckets
+      of vertices
+    - locally renumbers the vertices with the same label
+*/
+/*************************************************************************/
+int32_t *reorder_freqlpn_db(params_t *params, gk_graph_t *graph)
+{
+  int32_t i, ii, k, nvtxs, maxlbl;
+  ssize_t j, *xadj; 
+  int32_t *adjncy, *labels, *freq, *perm, *dbucket;
+  gk_i32kv_t *cand;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  labels  = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
+  freq    = gk_i32smalloc(nvtxs, 0, "freq");
+  perm    = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
+  dbucket = gk_i32malloc(nvtxs, "dbucket");
+
+  for (i=0; i<nvtxs; i++)
+    dbucket[i] = ((xadj[i+1]-xadj[i])>>3);
+
+  for (k=0; k<params->niter; k++) {
+    gk_i32randArrayPermuteFine(nvtxs, perm, 0);
+    for (ii=0; ii<nvtxs; ii++) {
+      i = perm[ii];
+      maxlbl = labels[i];
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        if (dbucket[i] != dbucket[adjncy[j]])
+          continue;
+
+        freq[labels[adjncy[j]]]++;
+        if (freq[maxlbl] < freq[labels[adjncy[j]]])
+          maxlbl = labels[adjncy[j]];
+        else if (freq[maxlbl] == freq[labels[adjncy[j]]]) {
+          if (RandomInRange(2))
+            maxlbl = labels[adjncy[j]];
+        }
+      }
+      for (j=xadj[i]; j<xadj[i+1]; j++) 
+        freq[labels[adjncy[j]]] = 0;
+      labels[i] = maxlbl;
+    }
+  }
+
+  cand = gk_i32kvmalloc(nvtxs, "cand");
+  for (i=0; i<nvtxs; i++) {
+    cand[i].key = labels[i];
+    cand[i].val = i;
+  }
+  gk_i32kvsorti(nvtxs, cand);
+
+  for (i=0; i<nvtxs; i++)
+    perm[cand[i].val] = i;
+
+  gk_free((void **)&labels, &freq, &dbucket, &cand, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function re-orders the graph by:
+    - performing a fixed number of min-label propagation iterations
+    - locally renumbers the vertices with the same label
+*/
+/*************************************************************************/
+int32_t *reorder_minlpn(params_t *params, gk_graph_t *graph)
+{
+  int32_t i, ii, k, nvtxs, minlbl;
+  ssize_t j, *xadj; 
+  int32_t *adjncy, *labels, *perm;
+  gk_i32kv_t *cand;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  labels = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
+  perm   = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
+
+  for (k=0; k<params->niter; k++) {
+    for (i=0; i<nvtxs; i++) {
+      minlbl = labels[i];
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        if (minlbl > labels[adjncy[j]])
+          minlbl = labels[adjncy[j]];
+      }
+      labels[i] = minlbl;
+    }
+  }
+
+  cand = gk_i32kvmalloc(nvtxs, "cand");
+  for (i=0; i<nvtxs; i++) {
+    cand[i].key = labels[i];
+    cand[i].val = i;
+  }
+  gk_i32kvsorti(nvtxs, cand);
+
+  for (i=0; i<nvtxs; i++) {
+    perm[cand[i].val] = i;
+    //if (i>0 && cand[i].key != cand[i-1].key)
+    //  printf("%10d %10d\n", i-1, cand[i-1].key);
+  }
+  //printf("%10d %10d\n", i-1, cand[i-1].key);
+
+  gk_free((void **)&labels, &cand, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function re-orders the graph by:
+    - performing a fixed number of min-label propagation iterations 
+    - restricts that propagation to take place within similar degree buckets
+      of vertices
+    - locally renumbers the vertices with the same label
+*/
+/*************************************************************************/
+int32_t *reorder_minlpn_db(params_t *params, gk_graph_t *graph)
+{
+  int32_t i, ii, k, nvtxs, minlbl;
+  ssize_t j, *xadj; 
+  int32_t *adjncy, *labels, *perm, *dbucket;
+  gk_i32kv_t *cand;
+
+  nvtxs  = graph->nvtxs;
+  xadj   = graph->xadj;
+  adjncy = graph->adjncy;
+
+  labels  = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "labels"));
+  perm    = gk_i32incset(nvtxs, 0, gk_i32malloc(nvtxs, "perm"));
+  dbucket = gk_i32malloc(nvtxs, "dbucket");
+
+  for (i=0; i<nvtxs; i++)
+    dbucket[i] = ((xadj[i+1]-xadj[i])>>3);
+
+  for (k=0; k<params->niter; k++) {
+    for (i=0; i<nvtxs; i++) {
+      minlbl = labels[i];
+      for (j=xadj[i]; j<xadj[i+1]; j++) {
+        if (dbucket[i] != dbucket[adjncy[j]])
+          continue;
+
+        if (minlbl > labels[adjncy[j]])
+          minlbl = labels[adjncy[j]];
+      }
+      labels[i] = minlbl;
+    }
+  }
+
+  cand = gk_i32kvmalloc(nvtxs, "cand");
+  for (i=0; i<nvtxs; i++) {
+    cand[i].key = labels[i];
+    cand[i].val = i;
+  }
+  gk_i32kvsorti(nvtxs, cand);
+
+  for (i=0; i<nvtxs; i++) {
+    perm[cand[i].val] = i;
+    //if (i>0 && cand[i].key != cand[i-1].key)
+    //  printf("%10d %10d\n", i-1, cand[i-1].key);
+  }
+  //printf("%10d %10d\n", i-1, cand[i-1].key);
+
+  gk_free((void **)&labels, &dbucket, &cand, LTERM);
+
+  return perm;
+}
+
+
+/*************************************************************************/
+/*! This function prints run parameters */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_graph_t *graph)
+{
+  printf("*******************************************************************************\n");
+  printf(" gkgraph\n\n");
+  printf("Graph Information ----------------------------------------------------------\n");
+  printf(" input file=%s, [%d, %zd]\n", 
+      params->infile, graph->nvtxs, graph->xadj[graph->nvtxs]);
+
+  printf("\n");
+  printf("Options --------------------------------------------------------------------\n");
+  printf(" lnbits=%d, cnbits=%d, type=%d, niter=%d, lamda=%f, eps=%e\n",
+      params->lnbits, params->cnbits, params->type, params->niter, 
+      params->lamda, params->eps);
+
+  printf("\n");
+  printf("Working... -----------------------------------------------------------------\n");
+}
+
+
+/*************************************************************************/
+/*! This function prints final statistics */
+/*************************************************************************/
+void print_final_info(params_t *params)
+{
+  printf("\n");
+  printf("Memory Usage Information -----------------------------------------------------\n");
+  printf("   Maximum memory used:              %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
+  printf("   Current memory used:              %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
+  printf("********************************************************************************\n");
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->lnbits    = 6;
+  params->cnbits    = 13;
+  params->type      = 1;
+  params->niter     = 1;
+  params->eps       = 1e-10;
+  params->lamda     = 0.20;
+  params->nosort    = 0;
+  params->write     = 0;
+  params->infile    = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_LNBITS:
+        if (gk_optarg) params->lnbits = atoi(gk_optarg);
+        break;
+      case CMD_CNBITS:
+        if (gk_optarg) params->cnbits = atoi(gk_optarg);
+        break;
+      case CMD_TYPE:
+        if (gk_optarg) params->type = atoi(gk_optarg);
+        break;
+      case CMD_NITER:
+        if (gk_optarg) params->niter = atoi(gk_optarg);
+        break;
+      case CMD_EPS:
+        if (gk_optarg) params->eps = atof(gk_optarg);
+        break;
+      case CMD_LAMDA:
+        if (gk_optarg) params->lamda = atof(gk_optarg);
+        break;
+      case CMD_NOSORT:
+        params->nosort = 1;
+        break;
+      case CMD_WRITE:
+        params->write = 1;
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 1) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+
+  if (argc-gk_optind > 0) 
+    params->outfile = gk_strdup(argv[gk_optind++]);
+  else
+    params->outfile   = gk_strdup("gkgraph.out");
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  return params;
+}
+
diff --git a/test/gksort.c b/test/gksort.c
new file mode 100644
index 0000000..6543836
--- /dev/null
+++ b/test/gksort.c
@@ -0,0 +1,346 @@
+/*!
+\file  gksort.c
+\brief Testing module for the various sorting routines in GKlib
+
+\date   Started 4/4/2007
+\author George
+\version\verbatim $Id: gksort.c 11058 2011-11-10 00:02:50Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+#define N       10000
+
+/*************************************************************************/
+/*! Testing module for gk_?isort() routine */
+/*************************************************************************/
+void test_isort()
+{
+  gk_idx_t i;
+  int array[N];
+
+  /* test the increasing sort */
+  printf("Testing iisort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_isorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] > array[i+1])
+      printf("gk_isorti error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing disort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_isortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] < array[i+1])
+      printf("gk_isortd error at index %jd [%d %d]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?fsort() routine */
+/*************************************************************************/
+void test_fsort()
+{
+  gk_idx_t i;
+  float array[N];
+
+  /* test the increasing sort */
+  printf("Testing ifsort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
+
+  gk_fsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] > array[i+1])
+      printf("gk_fsorti error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing dfsort...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432)/(1.0+RandomInRange(645323));
+
+  gk_fsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] < array[i+1])
+      printf("gk_fsortd error at index %jd [%f %f]\n", (intmax_t)i, array[i], array[i+1]);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?idxsort() routine */
+/*************************************************************************/
+void test_idxsort()
+{
+  gk_idx_t i;
+  gk_idx_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing idxsorti...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_idxsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] > array[i+1])
+      printf("gk_idxsorti error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing idxsortd...\n");
+  for (i=0; i<N; i++)
+    array[i] = RandomInRange(123432);
+
+  gk_idxsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i] < array[i+1])
+      printf("gk_idxsortd error at index %zd [%zd %zd]\n", (ssize_t)i, (ssize_t)array[i], (ssize_t)array[i+1]);
+  }
+
+}
+
+
+
+/*************************************************************************/
+/*! Testing module for gk_?ikvsort() routine */
+/*************************************************************************/
+void test_ikvsort()
+{
+  gk_idx_t i;
+  gk_ikv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing ikvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_ikvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_ikvsorti error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing ikvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_ikvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_ikvsortd error at index %jd [%d %d] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+
+/*************************************************************************/
+/*! Testing module for gk_?fkvsort() routine */
+/*************************************************************************/
+void test_fkvsort()
+{
+  gk_idx_t i;
+  gk_fkv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing fkvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_fkvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_fkvsorti error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing fkvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_fkvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_fkvsortd error at index %jd [%f %f] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?dkvsort() routine */
+/*************************************************************************/
+void test_dkvsort()
+{
+  gk_idx_t i;
+  gk_dkv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing dkvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_dkvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_dkvsorti error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing dkvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432)/(1.0+RandomInRange(645323));
+    array[i].val = i;
+  }
+
+  gk_dkvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_dkvsortd error at index %jd [%lf %lf] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?skvsort() routine */
+/*************************************************************************/
+void test_skvsort()
+{
+  gk_idx_t i;
+  gk_skv_t array[N];
+  char line[256];
+
+  /* test the increasing sort */
+  printf("Testing skvsorti...\n");
+  for (i=0; i<N; i++) {
+    sprintf(line, "%d", RandomInRange(123432));
+    array[i].key = gk_strdup(line);
+    array[i].val = i;
+  }
+
+  gk_skvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (strcmp(array[i].key, array[i+1].key) > 0)
+      printf("gk_skvsorti error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing skvsortd...\n");
+  for (i=0; i<N; i++) {
+    sprintf(line, "%d", RandomInRange(123432));
+    array[i].key = gk_strdup(line);
+    array[i].val = i;
+  }
+
+  gk_skvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    /*printf("%s\n", array[i].key);*/
+    if (strcmp(array[i].key, array[i+1].key) < 0)
+      printf("gk_skvsortd error at index %jd [%s %s] [%jd %jd]\n", (intmax_t)i, array[i].key, array[i+1].key, (intmax_t)array[i].val, (intmax_t)array[i+1].val);
+  }
+
+}
+
+
+/*************************************************************************/
+/*! Testing module for gk_?idxkvsort() routine */
+/*************************************************************************/
+void test_idxkvsort()
+{
+  gk_idx_t i;
+  gk_idxkv_t array[N];
+
+  /* test the increasing sort */
+  printf("Testing idxkvsorti...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_idxkvsorti(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key > array[i+1].key)
+      printf("gk_idxkvsorti error at index %zd [%zd %zd] [%zd %zd]\n", 
+          (ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key, 
+          (ssize_t)array[i].val, (ssize_t)array[i+1].val);
+  }
+
+
+  /* test the decreasing sort */
+  printf("Testing idxkvsortd...\n");
+  for (i=0; i<N; i++) {
+    array[i].key = RandomInRange(123432);
+    array[i].val = i;
+  }
+
+  gk_idxkvsortd(N, array);
+
+  for (i=0; i<N-1; i++) {
+    if (array[i].key < array[i+1].key)
+      printf("gk_idxkvsortd error at index %zd [%zd %zd] [%zd %zd]\n", 
+          (ssize_t)i, (ssize_t)array[i].key, (ssize_t)array[i+1].key, 
+          (ssize_t)array[i].val, (ssize_t)array[i+1].val);
+  }
+
+}
+
+
+
+
+int main()
+{
+  test_isort();
+  test_fsort();
+  test_idxsort();
+
+  test_ikvsort();
+  test_fkvsort();
+  test_dkvsort();
+  test_skvsort();
+  test_idxkvsort();
+}
+
diff --git a/test/gkuniq.c b/test/gkuniq.c
new file mode 100644
index 0000000..0b4bf68
--- /dev/null
+++ b/test/gkuniq.c
@@ -0,0 +1,268 @@
+/*!
+\file  
+\brief A program to test various implementations for unique.
+
+\date 10/8/2020
+\author George
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  ssize_t length, dupfactor;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_HELP        10
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"help",          0,      0,      CMD_HELP},
+  {0,               0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: gkuniq length dupfactor",
+" ",
+" Required parameters",
+"  length",
+"     The length of the base array.",
+" ",
+"  dupfactor",
+"     The number of times the initial array is replicated.",
+" ",
+" Optional parameters",
+"  -help",
+"     Prints this message.",
+""
+};
+
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[]);
+int unique_v1(int n, int *input, int *output);
+int unique_v2(int n, int *input, int *output);
+int unique_v3(int n, int *input, int *output, int *r_maxsize, int **r_hmap);
+void mem_flush(const void *p, unsigned int allocation_size);
+
+/*************************************************************************/
+/*! A function to flush the cache associated with an array */
+/**************************************************************************/
+void mem_flush(const void *p, unsigned int allocation_size)
+{
+#ifndef NO_X86 
+  const size_t cache_line = 64;
+  const char *cp = (const char *)p;
+  size_t i = 0;
+
+  if (p == NULL || allocation_size <= 0)
+    return;
+
+  for (i = 0; i < allocation_size; i += cache_line) {
+    __asm__ volatile("clflush (%0)\n\t"
+                 :
+                 : "r"(&cp[i])
+                 : "memory");
+  }
+
+  __asm__ volatile("sfence\n\t"
+                :
+                :
+                : "memory");
+#endif
+}
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  int i, j, k;
+  params_t *params;
+  double tmr;
+  int n, nunique, *input, *output;
+  int maxsize=0, *hmap=NULL; 
+ 
+  params = parse_cmdline(argc, argv);
+
+  /* create the input data */
+  n = params->length*params->dupfactor;
+  input  = gk_imalloc(n, "input");
+  output = gk_imalloc(n, "output");
+  for (i=0; i<params->length; i++) {
+    k = RandomInRange(n);
+    for (j=0; j<params->dupfactor; j++)
+      input[j*params->length+i] = k;
+  }
+
+  gk_clearwctimer(tmr);
+  gk_startwctimer(tmr);
+  mem_flush(input, n*sizeof(int));
+  mem_flush(output, n*sizeof(int));
+  nunique = unique_v1(n, input, output);
+  gk_stopwctimer(tmr);
+  printf(" V1: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
+
+  gk_clearwctimer(tmr);
+  gk_startwctimer(tmr);
+  mem_flush(input, n*sizeof(int));
+  mem_flush(output, n*sizeof(int));
+  nunique = unique_v2(n, input, output);
+  gk_stopwctimer(tmr);
+  printf(" V2: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
+
+  gk_clearwctimer(tmr);
+  gk_startwctimer(tmr);
+  mem_flush(input, n*sizeof(int));
+  mem_flush(output, n*sizeof(int));
+  nunique = unique_v3(n, input, output, &maxsize, &hmap);
+  gk_stopwctimer(tmr);
+  printf("V3c: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
+
+  gk_clearwctimer(tmr);
+  gk_startwctimer(tmr);
+  mem_flush(input, n*sizeof(int));
+  mem_flush(output, n*sizeof(int));
+  nunique = unique_v3(n, input, output, &maxsize, &hmap);
+  gk_stopwctimer(tmr);
+  printf("V3w: nunique: %d, timer: %.5lf\n", nunique, gk_getwctimer(tmr));
+
+  gk_free((void **)&input, &output, &hmap, LTERM);
+
+}
+
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 2) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(helpstr[i]) > 0; i++)
+      printf("%s\n", helpstr[i]);
+    exit(0);
+  }
+
+  params->length    = atoi(argv[gk_optind++]);
+  params->dupfactor = atoi(argv[gk_optind++]);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! gklib-sort based approach */
+/*************************************************************************/
+int unique_v1(int n, int *input, int *output)
+{
+  int i, j;
+
+  gk_isorti(n, input);
+
+  output[0] = input[0];
+  for (j=0, i=1; i<n; i++) {
+    if (output[j] != input[i]) 
+      output[++j] = input[i];
+  }
+  return j+1;
+}
+
+
+/*************************************************************************/
+/*! hash-table based approach */
+/*************************************************************************/
+int unique_v2(int n, int *input, int *output)
+{
+  int i, j, k, nuniq, size, mask;
+  int *hmap;
+
+  for (size=1; size<2*n; size*=2);
+  mask = size-1;
+  //printf("size: %d, mask: %x\n", size, mask);
+  hmap = gk_ismalloc(size, -1, "hmap");
+
+  for (nuniq=0, i=0; i<n; i++) {
+    k = input[i];
+    for (j=(k&mask); hmap[j]!=-1 && hmap[j]!=k; j=((j+1)&mask));
+    if (hmap[j] == -1) {
+      hmap[j] = k;
+      output[nuniq++] = k;
+    }
+  }
+
+  gk_free((void **)&hmap, LTERM);
+  return nuniq;
+}
+
+
+/*************************************************************************/
+/*! hash-table based approach, where the htable is most likely pre-allocated */
+/*************************************************************************/
+int unique_v3(int n, int *input, int *output, int *r_maxsize, int **r_hmap)
+{
+  int i, j, k, nuniq, size, mask;
+  int *hmap;
+
+  for (size=1; size<2*n; size*=2);
+  mask = size-1;
+  //printf("size: %d, mask: %x\n", size, mask);
+  if (size > *r_maxsize) {
+    gk_free((void **)r_hmap, LTERM);
+    hmap = *r_hmap = gk_ismalloc(size, -1, "hmap");
+    *r_maxsize = size;
+  }
+  else {
+    hmap = *r_hmap;
+    gk_iset(size, -1, hmap);
+  }
+
+  for (nuniq=0, i=0; i<n; i++) {
+    k = input[i];
+    for (j=(k&mask); hmap[j]!=-1 && hmap[j]!=k; j=((j+1)&mask));
+    if (hmap[j] == -1) {
+      hmap[j] = k;
+      output[nuniq++] = k;
+    }
+  }
+
+  return nuniq;
+}
diff --git a/test/grKx.c b/test/grKx.c
new file mode 100644
index 0000000..a72b580
--- /dev/null
+++ b/test/grKx.c
@@ -0,0 +1,256 @@
+/*!
+\file  
+\brief A simple program to create multiple copies of an input matrix.
+
+\date 5/30/2013
+\author George
+\version \verbatim $Id: grKx.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int inf, outf;
+  int numbering;    /* input numbering (output when applicable) */
+  int readvals;     /* input values (output when applicable) */
+  int writevals;    /* output values */
+  int rshuf, cshuf; /* random shuffle of rows/columns */
+  int symmetric;    /* a symmetric shuffle */
+  int ncopies;      /* the copies of the graph to create */
+  char *infile;     /* input file */
+  char *outfile;    /* output file */
+} params_t;
+
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NUMONE        1
+#define CMD_NOREADVALS    2
+#define CMD_NOWRITEVALS   3
+#define CMD_RSHUF         4
+#define CMD_CSHUF         5
+#define CMD_SYMMETRIC     6
+#define CMD_HELP          100
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"numone",      0,      0,      CMD_NUMONE},
+  {"noreadvals",  0,      0,      CMD_NOREADVALS},
+  {"nowritevals", 0,      0,      CMD_NOWRITEVALS},
+  {"rshuf",       0,      0,      CMD_RSHUF},
+  {"cshuf",       0,      0,      CMD_CSHUF},
+  {"symmetric",   0,      0,      CMD_SYMMETRIC},
+  {"help",        0,      0,      CMD_HELP},
+  {0,             0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>",
+" ",
+" Required parameters",
+"  infile, outfile",
+"     The name of the input/output CSR file.",
+" ",
+"  inf/outf",
+"     The format of the input/output file.",
+"     Supported values are:",
+"        1  GK_CSR_FMT_CLUTO",
+"        2  GK_CSR_FMT_CSR",
+"        3  GK_CSR_FMT_METIS",
+"        4  GK_CSR_FMT_BINROW",
+"        6  GK_CSR_FMT_IJV",
+"        7  GK_CSR_FMT_BIJV",
+" ",
+" Optional parameters",
+"  -numone",
+"     Specifies that the numbering of the input file starts from 1. ",
+"     It only applies to CSR/IJV formats.",
+" ",
+"  -nowritevals",
+"     Specifies that no values will be output.",
+" ",
+"  -noreadvals",
+"     Specifies that the values will not be read when applicable.",
+" ",
+"  -rshuf",
+"     Specifies that the rows will be randmly shuffled prior to output.",
+" ",
+"  -cshuf",
+"     Specifies that the columns will be randmly shuffled prior to output.",
+" ",
+"  -symmetric",
+"     Specifies that the row+column shuffling will be symmetric.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: grKx [options] <infile> <inf> <outfile> <outf> <ncopies>",
+"          use 'csrconv -help' for a summary of the options.",
+""
+};
+ 
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->numbering = 0;
+  params->readvals  = 1;
+  params->writevals = 1;
+  params->rshuf     = 0;
+  params->cshuf     = 0;
+  params->symmetric = 0;
+
+  params->inf       = -1;
+  params->outf      = -1;
+  params->infile    = NULL;
+  params->outfile   = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_NUMONE:
+        params->numbering = 1;
+        break;
+      case CMD_NOREADVALS:
+        params->readvals = 0;
+        break;
+      case CMD_NOWRITEVALS:
+        params->writevals = 0;
+        break;
+      case CMD_RSHUF:
+        params->rshuf = 1;
+        break;
+      case CMD_CSHUF:
+        params->cshuf = 1;
+        break;
+      case CMD_SYMMETRIC:
+        params->symmetric = 1;
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 5) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+  params->inf     = atoi(argv[gk_optind++]);
+  params->outfile = gk_strdup(argv[gk_optind++]);
+  params->outf    = atoi(argv[gk_optind++]);
+  params->ncopies = atoi(argv[gk_optind++]);
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  ssize_t i, j, k, knnz, nrows, ncols, ncopies;
+  int what;
+  params_t *params;
+  gk_csr_t *mat, *kmat, *smat;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  /* read the data */
+  mat = gk_csr_Read(params->infile, params->inf, params->readvals, params->numbering);
+
+  /* create the copies */
+  ncopies = params->ncopies;
+
+  nrows = mat->nrows;
+  ncols = mat->ncols;
+  knnz  = mat->rowptr[nrows]*ncopies;
+
+  kmat         = gk_csr_Create();
+  kmat->nrows  = nrows*ncopies;
+  kmat->ncols  = ncols*ncopies;
+  kmat->rowptr = gk_zmalloc(kmat->nrows+1, "rowptr");
+  kmat->rowind = gk_imalloc(knnz, "rowind");
+  if (mat->rowval)
+    kmat->rowval = gk_fmalloc(knnz, "rowval");
+
+  kmat->rowptr[0] = knnz = 0;
+  for (k=0; k<ncopies; k++) {
+    for (i=0; i<nrows; i++) {
+      for (j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++, knnz++) {
+        kmat->rowind[knnz] = mat->rowind[j] + k*ncols;
+        if (mat->rowval)
+          kmat->rowval[knnz] = mat->rowval[j];
+      }
+      kmat->rowptr[k*nrows+i+1] = knnz;
+    }
+  }
+
+  gk_csr_Free(&mat);
+  mat = kmat;
+
+
+  if (params->rshuf || params->cshuf) {
+    if (params->rshuf && params->cshuf)
+      what = GK_CSR_ROWCOL;
+    else if (params->rshuf)
+      what = GK_CSR_ROW;
+    else
+      what = GK_CSR_COL;
+
+    smat = gk_csr_Shuffle(mat, what, params->symmetric);
+    gk_csr_Free(&mat);
+    mat = smat;
+  }
+
+  if (params->writevals && mat->rowval == NULL) 
+    mat->rowval = gk_fsmalloc(mat->rowptr[mat->nrows], 1.0, "mat->rowval");
+
+  gk_csr_Write(mat, params->outfile, params->outf, params->writevals, 0);
+
+  gk_csr_Free(&mat);
+
+}
+
diff --git a/test/m2mnbrs.c b/test/m2mnbrs.c
new file mode 100644
index 0000000..53f35ca
--- /dev/null
+++ b/test/m2mnbrs.c
@@ -0,0 +1,304 @@
+/*!
+\file  
+\brief It takes as input two CSR matrices and finds for each row of the 
+       first matrix the most similar rows in the second matrix.
+
+\date 9/27/2014
+\author George
+\version \verbatim $Id: m2mnbrs.c 17699 2014-09-27 18:05:31Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int simtype;             /*!< The similarity type to use */
+  int nnbrs;               /*!< The maximum number of nearest neighbots to output */
+  float minsim;            /*!< The minimum similarity to use for keeping neighbors */
+
+  int verbosity;           /*!< The reporting verbosity level */
+
+  char *qfile;             /*!< The file storing the query documents */
+  char *cfile;             /*!< The file storing the collection documents */
+  char *outfile;           /*!< The file where the output will be stored */
+
+  /* timers */
+  double timer_global;
+  double timer_1;
+  double timer_2;
+  double timer_3;
+  double timer_4;
+} params_t;
+
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+/* Versions */
+#define VER_MAJOR           0
+#define VER_MINOR           1
+#define VER_SUBMINOR        0
+
+/* Command-line option codes */
+#define CMD_SIMTYPE         10
+#define CMD_NNBRS           20
+#define CMD_MINSIM          22
+#define CMD_VERBOSITY       70
+#define CMD_HELP            100
+
+/* The text labels for the different simtypes */
+static char simtypenames[][10] = {"", "dotp", "cos", "jac", ""};
+
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"simtype",           1,      0,      CMD_SIMTYPE},
+  {"nnbrs",             1,      0,      CMD_NNBRS},
+  {"minsim",            1,      0,      CMD_MINSIM},
+  {"verbosity",         1,      0,      CMD_VERBOSITY},
+
+  {"help",              0,      0,      CMD_HELP},
+  {0,                   0,      0,      0}
+};
+
+static gk_StringMap_t simtype_options[] = {
+  {"cos",                GK_CSR_COS},
+  {"jac",                GK_CSR_JAC},
+  {NULL,                 0}
+};
+
+
+/*-------------------------------------------------------------------
+ * Mini help
+ *-------------------------------------------------------------------*/
+static char helpstr[][100] =
+{
+" ",
+"Usage: m2mnbrs [options] qfile cfile [outfile]",
+" ",
+" Options",
+"  -simtype=string",
+"     Specifies the type of similarity to use. Possible values are:",
+"       cos   - Cosine similarity",
+"       jac   - Jacquard similarity [default]", 
+" ",
+"  -nnbrs=int",
+"     Specifies the maximum number of nearest neighbors.",
+"     A value of -1 indicates that all neighbors will be considered.",
+"     Default value is 100.",
+" ",
+"  -minsim=float",
+"     The minimum allowed similarity between neighbors. ",
+"     Default value is .25.",
+" ",
+"  -verbosity=int",
+"     Specifies the level of debugging information to be displayed.",
+"     Default value is 0.",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[]);
+void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat);
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->simtype   = GK_CSR_JAC;
+  params->nnbrs     = 100;
+  params->minsim    = .25;
+  params->verbosity = -1;
+  params->qfile     = NULL;
+  params->cfile     = NULL;
+  params->outfile   = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_SIMTYPE:
+        if (gk_optarg) {
+          if ((params->simtype = gk_GetStringID(simtype_options, gk_optarg)) == -1)
+            errexit("Invalid simtype of %s.\n", gk_optarg);
+        }
+        break;
+
+      case CMD_NNBRS:
+        if (gk_optarg) params->nnbrs = atoi(gk_optarg);
+        break;
+
+      case CMD_MINSIM:
+        if (gk_optarg) params->minsim = atof(gk_optarg);
+        break;
+
+      case CMD_VERBOSITY:
+        if (gk_optarg) params->verbosity = atoi(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(EXIT_SUCCESS);
+        break;
+
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(EXIT_FAILURE);
+    }
+  }
+
+  /* Get the input/output file info */
+  if (argc-gk_optind < 1) {
+    printf("Missing input/output file info.\n  Use %s -help for a summary of the options.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  params->qfile   = gk_strdup(argv[gk_optind++]);
+  params->cfile   = gk_strdup(argv[gk_optind++]);
+  params->outfile = (gk_optind < argc ? gk_strdup(argv[gk_optind++]) : NULL);
+
+  if (!gk_fexists(params->qfile))
+    errexit("input file %s does not exist.\n", params->qfile);
+  if (!gk_fexists(params->cfile))
+    errexit("input file %s does not exist.\n", params->cfile);
+
+  return params;
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the program */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  params_t *params;
+  gk_csr_t *qmat, *cmat;
+  int rc = EXIT_SUCCESS;
+
+  params = parse_cmdline(argc, argv);
+
+  qmat = gk_csr_Read(params->qfile, GK_CSR_FMT_CSR, 1, 0);
+  cmat = gk_csr_Read(params->cfile, GK_CSR_FMT_CSR, 1, 0);
+
+
+  printf("********************************************************************************\n");
+  printf("sd (%d.%d.%d) Copyright 2014, GK.\n", VER_MAJOR, VER_MINOR, VER_SUBMINOR);
+  printf("  simtype=%s, nnbrs=%d, minsim=%.2f\n",
+      simtypenames[params->simtype], params->nnbrs, params->minsim);
+  printf("  qfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
+      params->qfile, qmat->nrows, qmat->ncols, qmat->rowptr[qmat->nrows]);
+  printf("  cfile=%s, nrows=%d, ncols=%d, nnz=%zd\n",
+      params->cfile, cmat->nrows, cmat->ncols, cmat->rowptr[cmat->nrows]);
+
+  gk_clearwctimer(params->timer_global);
+  gk_clearwctimer(params->timer_1);
+  gk_clearwctimer(params->timer_2);
+  gk_clearwctimer(params->timer_3);
+  gk_clearwctimer(params->timer_4);
+
+  gk_startwctimer(params->timer_global);
+
+  FindNeighbors(params, qmat, cmat);
+
+  gk_stopwctimer(params->timer_global);
+
+  printf("    wclock: %.2lfs\n", gk_getwctimer(params->timer_global));
+  printf("    timer1: %.2lfs\n", gk_getwctimer(params->timer_1));
+  printf("    timer2: %.2lfs\n", gk_getwctimer(params->timer_2));
+  printf("    timer3: %.2lfs\n", gk_getwctimer(params->timer_3));
+  printf("    timer4: %.2lfs\n", gk_getwctimer(params->timer_4));
+  printf("********************************************************************************\n");
+
+  gk_csr_Free(&qmat);
+  gk_csr_Free(&cmat);
+
+  exit(rc);
+}
+
+
+/*************************************************************************/
+/*! Reads and computes the neighbors of each query document against the
+    collection of documents */
+/**************************************************************************/
+void FindNeighbors(params_t *params, gk_csr_t *qmat, gk_csr_t *cmat)
+{
+  int iQ, iH, nhits;
+  int32_t *marker;
+  gk_fkv_t *hits, *cand;
+  FILE *fpout;
+
+  GKASSERT(qmat->ncols <= cmat->ncols);
+
+  /* if cosine, make rows unit length */
+  if (params->simtype == GK_CSR_COS) {
+    gk_csr_Normalize(qmat, GK_CSR_ROW, 2);
+    gk_csr_Normalize(cmat, GK_CSR_ROW, 2);
+  }
+
+  /* create the inverted index */
+  gk_csr_CreateIndex(cmat, GK_CSR_COL);
+
+  /* compute the row norms */
+  gk_csr_ComputeSquaredNorms(cmat, GK_CSR_ROW);
+
+  /* create the output file */
+  fpout = (params->outfile ? gk_fopen(params->outfile, "w", "FindNeighbors: fpout") : NULL);
+
+  /* allocate memory for the necessary working arrays */
+  hits   = gk_fkvmalloc(cmat->nrows, "FindNeighbors: hits");
+  marker = gk_i32smalloc(cmat->nrows, -1, "FindNeighbors: marker");
+  cand   = gk_fkvmalloc(cmat->nrows, "FindNeighbors: cand");
+
+
+  /* find the best neighbors for each query document */
+  gk_startwctimer(params->timer_1);
+  for (iQ=0; iQ<qmat->nrows; iQ++) {
+    if (params->verbosity > 0)
+      printf("Working on query %7d\n", iQ);
+
+    /* find the neighbors of the ith document */ 
+    nhits = gk_csr_GetSimilarRows(cmat, 
+                 qmat->rowptr[iQ+1]-qmat->rowptr[iQ], 
+                 qmat->rowind+qmat->rowptr[iQ], 
+                 qmat->rowval+qmat->rowptr[iQ], 
+                 params->simtype, params->nnbrs, params->minsim, 
+                 hits, marker, cand);
+
+    /* write the results in the file */
+    if (fpout) {
+      for (iH=0; iH<nhits; iH++) 
+        fprintf(fpout, "%8d %8zd %.3f\n", iQ, hits[iH].val, hits[iH].key);
+    }
+  }
+  gk_stopwctimer(params->timer_1);
+
+
+  /* cleanup and exit */
+  if (fpout) gk_fclose(fpout);
+
+  gk_free((void **)&hits, &marker, &cand, LTERM);
+}
+
diff --git a/test/rw.c b/test/rw.c
new file mode 100644
index 0000000..1a3295e
--- /dev/null
+++ b/test/rw.c
@@ -0,0 +1,306 @@
+/*!
+\file  
+\brief A simple (personalized) random walk program to test GKlib's routines
+
+\date 6/12/2008
+\author George
+\version \verbatim $Id$ \endverbatim
+*/
+
+#include <GKlib.h>
+
+/*************************************************************************/
+/*! Data structures for the code */
+/*************************************************************************/
+typedef struct {
+  int niter;
+  int ntvs;
+  int ppr;
+  float eps;
+  float lamda;
+  char *infile;
+  char *outfile;
+} params_t;
+
+/*************************************************************************/
+/*! Constants */
+/*************************************************************************/
+#define CMD_NITER       1
+#define CMD_EPS         2
+#define CMD_LAMDA       3
+#define CMD_PPR         4
+#define CMD_NTVS        5
+#define CMD_HELP        10
+
+
+/*************************************************************************/
+/*! Local variables */
+/*************************************************************************/
+static struct gk_option long_options[] = {
+  {"niter",      1,      0,      CMD_NITER},
+  {"lamda",      1,      0,      CMD_LAMDA},
+  {"eps",        1,      0,      CMD_EPS},
+  {"ppr",        1,      0,      CMD_PPR},
+  {"ntvs",       1,      0,      CMD_NTVS},
+  {"help",       0,      0,      CMD_HELP},
+  {0,            0,      0,      0}
+};
+
+
+/*-------------------------------------------------------------------*/
+/* Mini help  */
+/*-------------------------------------------------------------------*/
+static char helpstr[][100] = {
+" ",
+"Usage: rw [options] <graph-file> <out-file>",
+" ",
+" Required parameters",
+"  graph-file",
+"     The name of the file storing the transactions. The file is in ",
+"     Metis' graph format.",
+" ",
+" Optional parameters",
+"  -niter=int",
+"     Specifies the maximum number of iterations. [default: 100]",
+" ",
+"  -lamda=float",
+"     Specifies the follow-the-adjacent-links probability. [default: 0.80]",
+" ",
+"  -eps=float",
+"     Specifies the error tollerance. [default: 1e-10]",
+" ",
+"  -ppr=int",
+"     Specifies the source of the personalized PR. [default: -1]",
+" ",
+"  -ntvs=int",
+"     Specifies the number of test-vectors to compute. [default: -1]",
+" ",
+"  -help",
+"     Prints this message.",
+""
+};
+
+static char shorthelpstr[][100] = {
+" ",
+"   Usage: rw [options] <graph-file> <out-file>",
+"          use 'rw -help' for a summary of the options.",
+""
+};
+ 
+
+
+/*************************************************************************/
+/*! Function prototypes */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat);
+void print_final_info(params_t *params);
+params_t *parse_cmdline(int argc, char *argv[]);
+
+
+/*************************************************************************/
+/*! the entry point */
+/**************************************************************************/
+int main(int argc, char *argv[])
+{
+  ssize_t i, j, niter;
+  params_t *params;
+  gk_csr_t *mat;
+  FILE *fpout;
+ 
+  /* get command-line options */
+  params = parse_cmdline(argc, argv);
+
+  /* read the data */
+  mat = gk_csr_Read(params->infile, GK_CSR_FMT_METIS, 1, 1);
+
+  /* display some basic stats */
+  print_init_info(params, mat);
+
+
+  if (params->ntvs != -1) {
+    /* compute the pr for different randomly generated restart-distribution vectors */
+    float **prs;
+
+    prs = gk_fAllocMatrix(params->ntvs, mat->nrows, 0.0, "main: prs");
+
+    /* generate the random restart vectors */
+    for (j=0; j<params->ntvs; j++) {
+      for (i=0; i<mat->nrows; i++)
+        prs[j][i] = RandomInRange(931);
+      gk_fscale(mat->nrows, 1.0/gk_fsum(mat->nrows, prs[j], 1), prs[j], 1);
+
+      niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, prs[j]);
+      printf("tvs#: %zd; niters: %zd\n", j, niter);
+    }
+
+    /* output the computed pr scores */
+    fpout = gk_fopen(params->outfile, "w", "main: outfile");
+    for (i=0; i<mat->nrows; i++) {
+      for (j=0; j<params->ntvs; j++) 
+        fprintf(fpout, "%.4e ", prs[j][i]);
+      fprintf(fpout, "\n");
+    }
+    gk_fclose(fpout);
+
+    gk_fFreeMatrix(&prs, params->ntvs, mat->nrows);
+  }
+  else if (params->ppr != -1) {
+    /* compute the personalized pr from the specified vertex */
+    float *pr;
+
+    pr = gk_fsmalloc(mat->nrows, 0.0, "main: pr");
+
+    pr[params->ppr-1] = 1.0;
+
+    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
+    printf("ppr: %d; niters: %zd\n", params->ppr, niter);
+
+    /* output the computed pr scores */
+    fpout = gk_fopen(params->outfile, "w", "main: outfile");
+    for (i=0; i<mat->nrows; i++) 
+      fprintf(fpout, "%.4e\n", pr[i]);
+    gk_fclose(fpout);
+
+    gk_free((void **)&pr, LTERM);
+  }
+  else {
+    /* compute the standard pr */
+    int jmax;
+    float diff, maxdiff;
+    float *pr;
+
+    pr = gk_fsmalloc(mat->nrows, 1.0/mat->nrows, "main: pr");
+
+    niter = gk_rw_PageRank(mat, params->lamda, params->eps, params->niter, pr);
+    printf("pr; niters: %zd\n", niter);
+
+    /* output the computed pr scores */
+    fpout = gk_fopen(params->outfile, "w", "main: outfile");
+    for (i=0; i<mat->nrows; i++) {
+      for (jmax=i, maxdiff=0.0, j=mat->rowptr[i]; j<mat->rowptr[i+1]; j++) {
+        if ((diff = fabs(pr[i]-pr[mat->rowind[j]])) > maxdiff) {
+          maxdiff = diff;
+          jmax = mat->rowind[j];
+        }
+      }
+      fprintf(fpout, "%.4e %10zd %.4e %10d\n", pr[i], 
+          mat->rowptr[i+1]-mat->rowptr[i], maxdiff, jmax+1);
+    }
+    gk_fclose(fpout);
+
+    gk_free((void **)&pr, LTERM);
+  }
+
+  gk_csr_Free(&mat);
+
+  /* display some final stats */
+  print_final_info(params);
+}
+
+
+
+/*************************************************************************/
+/*! This function prints run parameters */
+/*************************************************************************/
+void print_init_info(params_t *params, gk_csr_t *mat)
+{
+  printf("*******************************************************************************\n");
+  printf(" fis\n\n");
+  printf("Matrix Information ---------------------------------------------------------\n");
+  printf(" input file=%s, [%d, %d, %zd]\n", 
+      params->infile, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
+
+  printf("\n");
+  printf("Options --------------------------------------------------------------------\n");
+  printf(" niter=%d, ntvs=%d, ppr=%d, lamda=%f, eps=%e\n",
+      params->niter, params->ntvs, params->ppr, params->lamda, params->eps);
+
+  printf("\n");
+  printf("Performing random walks... ----------------------------------------------\n");
+}
+
+
+/*************************************************************************/
+/*! This function prints final statistics */
+/*************************************************************************/
+void print_final_info(params_t *params)
+{
+  printf("\n");
+  printf("Memory Usage Information -----------------------------------------------------\n");
+  printf("   Maximum memory used:              %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
+  printf("   Current memory used:              %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
+  printf("********************************************************************************\n");
+}
+
+
+/*************************************************************************/
+/*! This is the entry point of the command-line argument parser */
+/*************************************************************************/
+params_t *parse_cmdline(int argc, char *argv[])
+{
+  int i;
+  int c, option_index;
+  params_t *params;
+
+  params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
+
+  /* initialize the params data structure */
+  params->niter     = 100;
+  params->ppr       = -1;
+  params->ntvs      = -1;
+  params->eps       = 1e-10;
+  params->lamda     = 0.80;
+  params->infile    = NULL;
+  params->outfile   = NULL;
+
+
+  /* Parse the command line arguments  */
+  while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
+    switch (c) {
+      case CMD_NITER:
+        if (gk_optarg) params->niter = atoi(gk_optarg);
+        break;
+      case CMD_NTVS:
+        if (gk_optarg) params->ntvs = atoi(gk_optarg);
+        break;
+      case CMD_PPR:
+        if (gk_optarg) params->ppr = atoi(gk_optarg);
+        break;
+      case CMD_EPS:
+        if (gk_optarg) params->eps = atof(gk_optarg);
+        break;
+      case CMD_LAMDA:
+        if (gk_optarg) params->lamda = atof(gk_optarg);
+        break;
+
+      case CMD_HELP:
+        for (i=0; strlen(helpstr[i]) > 0; i++)
+          printf("%s\n", helpstr[i]);
+        exit(0);
+        break;
+      case '?':
+      default:
+        printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
+        exit(0);
+    }
+  }
+
+  if (argc-gk_optind != 2) {
+    printf("Unrecognized parameters.");
+    for (i=0; strlen(shorthelpstr[i]) > 0; i++)
+      printf("%s\n", shorthelpstr[i]);
+    exit(0);
+  }
+
+  params->infile  = gk_strdup(argv[gk_optind++]);
+  params->outfile = gk_strdup(argv[gk_optind++]);
+
+  if (!gk_fexists(params->infile))
+    errexit("input file %s does not exist.\n", params->infile);
+
+  if (params->ppr != -1 && params->ntvs != -1)
+    errexit("Only one of the -ppr and -ntvs options can be specified.\n");
+
+  return params;
+}
+
diff --git a/test/splatt2svd.c b/test/splatt2svd.c
new file mode 100644
index 0000000..111d31c
--- /dev/null
+++ b/test/splatt2svd.c
@@ -0,0 +1,98 @@
+/*!
+\file 
+\brief A simple program to convert a tensor in coordinate format into an unfolded 
+       matrix
+
+\author George
+*/
+
+#include <GKlib.h>
+
+
+int main(int argc, char *argv[])
+{
+  size_t nnz, i, j, k, nI, nJ, nK, nrows, ncols;
+  int32_t *I, *J, *K, *rowind, *colind;
+  ssize_t *rowptr, *colptr;
+  float *V, *rowval, *colval;
+
+  if (argc != 2) 
+    errexit("Usage %s <infile> [%d]\n", argv[0], argc);
+
+  if (!gk_fexists(argv[1]))
+    errexit("File %s does not exist.\n", argv[1]);
+
+  gk_getfilestats(argv[1], &nnz, NULL, NULL, NULL);
+  I = gk_i32malloc(nnz, "I");
+  J = gk_i32malloc(nnz, "J");
+  K = gk_i32malloc(nnz, "K");
+  V = gk_fmalloc(nnz, "V");
+
+  fprintf(stderr, "Input nnz: %zd\n", nnz);
+
+  FILE *fpin = gk_fopen(argv[1], "r", "infile");
+  for (i=0; i<nnz; i++) {
+    if (4 != fscanf(fpin, "%d %d %d %f", K+i, I+i, J+i, V+i))
+      errexit("Failed to read 4 values in line %zd\n", i);
+    K[i]--; I[i]--; J[i]--;
+  }
+  gk_fclose(fpin);
+
+  nI = gk_i32max(nnz, I, 1)+1;
+  nJ = gk_i32max(nnz, J, 1)+1;
+  nK = gk_i32max(nnz, K, 1)+1;
+
+  fprintf(stderr, "nI: %zd, nJ: %zd, nK: %zd\n", nI, nJ, nK);
+
+  nrows = nK*nI;
+  ncols = nJ;
+  rowptr = gk_zsmalloc(nrows+1, 0, "rowptr");
+  for (i=0; i<nnz; i++) 
+    rowptr[K[i]*nI+I[i]]++;
+  MAKECSR(i, nrows, rowptr);
+
+  rowind = gk_i32malloc(nnz, "rowind");
+  rowval = gk_fmalloc(nnz, "rowval");
+  for (i=0; i<nnz; i++) {
+    rowind[rowptr[K[i]*nI+I[i]]] = J[i];
+    rowval[rowptr[K[i]*nI+I[i]]] = V[i];
+    rowptr[K[i]*nI+I[i]]++;
+  }
+  SHIFTCSR(i, nrows, rowptr);
+
+  gk_free((void **)&I, &J, &K, &V, LTERM);
+
+  colptr = gk_zsmalloc(ncols+1, 0, "colptr");
+  colind = gk_i32malloc(nnz, "colind");
+  colval = gk_fmalloc(nnz, "colval");
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++)
+      colptr[rowind[j]]++;
+  }
+  MAKECSR(i, ncols, colptr);
+  for (i=0; i<nrows; i++) {
+    for (j=rowptr[i]; j<rowptr[i+1]; j++) {
+      colind[colptr[rowind[j]]] = i;
+      colval[colptr[rowind[j]]] = rowval[j];
+      colptr[rowind[j]]++;
+    }
+  }
+  SHIFTCSR(i, ncols, colptr);
+
+  /* sanity check */
+  for (i=0; i<ncols; i++) {
+    for (j=colptr[i]+1; j<colptr[i+1]; j++) {
+      if (colind[j-1] == colind[j])
+        fprintf(stderr, "Duplicate row indices: %d %d %d\n", (int)i, colind[j], colind[j-1]);
+    }
+  }
+
+  printf("%zd %zd %zd\n", nrows, ncols, nnz);
+  for (i=0; i<ncols; i++) {
+    printf("%zd\n", colptr[i+1]-colptr[i]);
+    for (j=colptr[i]; j<colptr[i+1]; j++)
+      printf("%d %.3f\n", colind[j], colval[j]);
+  }
+
+}
+
diff --git a/test/strings.c b/test/strings.c
new file mode 100644
index 0000000..b241d3f
--- /dev/null
+++ b/test/strings.c
@@ -0,0 +1,82 @@
+/*!
+\file strings.c
+\brief Testing module for the string functions in GKlib
+
+\date Started 3/5/2007
+\author George
+\version\verbatim $Id: strings.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+#include <GKlib.h>
+
+
+/*************************************************************************/
+/*! Testing module for gk_strstr_replace()  */
+/*************************************************************************/
+void test_strstr_replace()
+{
+  char *new_str;
+  int rc;
+
+  rc = gk_strstr_replace("This is a simple string", "s", "S", "", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("This is a simple string", "s", "S", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "s", "T", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w(\\w+)\\w\\b", "$1", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("This is a simple SS & ss string", "\\b\\w+\\b", "word", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
+                          "(http://www\\.cs\\.umn\\.edu/)(.*)-T(\\d+)", "$1$2-P$3", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+  rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
+                          "(\\d+)", "number:$1", "ig", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+  rc = gk_strstr_replace("http://www.cs.umn.edu/This-is-something-T12323?pp=20&page=4",
+                          "(http://www\\.cs\\.umn\\.edu/)", "[$1]", "g", &new_str);
+  printf("%d, %s.\n", rc, new_str);
+  gk_free((void **)&new_str, LTERM);
+
+
+
+}
+
+
+
+int main()
+{
+  test_strstr_replace();
+
+/*
+  {
+  int i;
+  for (i=0; i<1000; i++)
+    printf("%d\n", RandomInRange(3));
+  }
+*/
+}
+
diff --git a/timers.c b/timers.c
new file mode 100644
index 0000000..bb8f296
--- /dev/null
+++ b/timers.c
@@ -0,0 +1,52 @@
+/*!
+\file  timers.c
+\brief Various timing functions 
+
+\date   Started 4/12/2007
+\author George
+\version\verbatim $Id: timers.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+
+
+/*************************************************************************
+* This function returns the CPU seconds
+**************************************************************************/
+double gk_WClockSeconds(void)
+{
+#ifdef __GNUC__
+  struct timeval ctime;
+
+  gettimeofday(&ctime, NULL);
+
+  return (double)ctime.tv_sec + (double).000001*ctime.tv_usec;
+#else
+  return (double)time(NULL);
+#endif
+}
+
+
+/*************************************************************************
+* This function returns the CPU seconds
+**************************************************************************/
+double gk_CPUSeconds(void)
+{
+//#ifdef __OPENMP__
+#ifdef __OPENMPXXXX__
+  return omp_get_wtime();
+#else
+  #if defined(WIN32) || defined(__MINGW32__)
+    return((double) clock()/CLOCKS_PER_SEC);
+  #else
+    struct rusage r;
+
+    getrusage(RUSAGE_SELF, &r);
+    return ((r.ru_utime.tv_sec + r.ru_stime.tv_sec) + 1.0e-6*(r.ru_utime.tv_usec + r.ru_stime.tv_usec));
+  #endif
+#endif
+}
+
diff --git a/tokenizer.c b/tokenizer.c
new file mode 100644
index 0000000..5efd262
--- /dev/null
+++ b/tokenizer.c
@@ -0,0 +1,77 @@
+/*!
+\file  tokenizer.c
+\brief String tokenization routines
+
+This file contains various routines for splitting an input string into
+tokens and returning them in form of a list. The goal is to mimic perl's 
+split function.
+
+\date   Started 11/23/04
+\author George
+\version\verbatim $Id: tokenizer.c 10711 2011-08-31 22:23:04Z karypis $ \endverbatim
+*/
+
+
+#include <GKlib.h>
+
+
+/************************************************************************
+* This function tokenizes a string based on the user-supplied delimiters
+* list. The resulting tokens are returned into an array of strings.
+*************************************************************************/
+void gk_strtokenize(char *str, char *delim, gk_Tokens_t *tokens)
+{
+  int i, ntoks, slen;
+
+  tokens->strbuf = gk_strdup(str);
+
+  slen  = strlen(str);
+  str   = tokens->strbuf;
+
+  /* Scan once to determine the number of tokens */
+  for (ntoks=0, i=0; i<slen;) {
+    /* Consume all the consecutive characters from the delimiters list */
+    while (i<slen && strchr(delim, str[i])) 
+      i++;
+
+    if (i == slen)
+      break;
+
+    ntoks++;
+
+    /* Consume all the consecutive characters from the token */
+    while (i<slen && !strchr(delim, str[i])) 
+      i++;
+  }
+
+
+  tokens->ntoks = ntoks;
+  tokens->list  = (char **)gk_malloc(ntoks*sizeof(char *), "strtokenize: tokens->list");
+
+
+  /* Scan a second time to mark and link the tokens */
+  for (ntoks=0, i=0; i<slen;) {
+    /* Consume all the consecutive characters from the delimiters list */
+    while (i<slen && strchr(delim, str[i])) 
+      str[i++] = '\0';
+
+    if (i == slen)
+      break;
+
+    tokens->list[ntoks++] = str+i;
+
+    /* Consume all the consecutive characters from the token */
+    while (i<slen && !strchr(delim, str[i])) 
+      i++;
+  }
+}
+
+
+/************************************************************************
+* This function frees the memory associated with a gk_Tokens_t
+*************************************************************************/
+void gk_freetokenslist(gk_Tokens_t *tokens)
+{
+  gk_free((void *)&tokens->list, &tokens->strbuf, LTERM);
+}
+
diff --git a/win32/adapt.c b/win32/adapt.c
new file mode 100644
index 0000000..546857c
--- /dev/null
+++ b/win32/adapt.c
@@ -0,0 +1,11 @@
+/*
+\file  win32/adapt.c
+\brief Implementation of Win32 adaptation of libc functions
+*/
+
+#include "adapt.h"
+
+pid_t getpid(void)
+{
+  return GetCurrentProcessId();
+}
diff --git a/win32/adapt.h b/win32/adapt.h
new file mode 100644
index 0000000..35e60ed
--- /dev/null
+++ b/win32/adapt.h
@@ -0,0 +1,14 @@
+/*
+\file  win32/adapt.h
+\brief Declaration of Win32 adaptation of POSIX functions and types
+*/
+#ifndef _WIN32_ADAPT_H_
+#define _WIN32_ADAPT_H_
+
+#include <windows.h>
+
+typedef DWORD pid_t;
+
+pid_t getpid(void);
+
+#endif  /* _WIN32_ADAPT_H_ */