Browse Source

Merge custom memcpy

Suren A. Chilingaryan 11 years ago
parent
commit
382ba1a163
5 changed files with 424 additions and 6 deletions
  1. 6 2
      CMakeLists.txt
  2. 1 0
      config.h.in
  3. 10 4
      fastwriter.c
  4. 344 0
      memcpy.c
  5. 63 0
      memcpy.h

+ 6 - 2
CMakeLists.txt

@@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 2.8)
 
 set(DISABLE_AIO TRUE CACHE BOOL "Use kernel AIO writer")
 set(DISABLE_XFS_REALTIME FALSE CACHE BOOL "Disable support of RealTime XFS partition")
-
+set(USE_CUSTOM_MEMCPY FALSE CACHE BOOL "Use custom memcpy routine instead of stanadrd")
 
 include(CheckIncludeFiles)
 check_include_files("linux/falloc.h" HAVE_LINUX_FALLOC_H)
@@ -29,6 +29,11 @@ add_definitions("-fPIC --std=c99 -Wall -O2 -pthread")
 set(HEADERS fastwriter.h sysinfo.h default.h private.h)
 set(SOURCES fastwriter.c sysinfo.c default.c)
 
+if (USE_CUSTOM_MEMCPY)
+    set(HEADERS ${HEADERS} memcpy.h)
+    set(SOURCES ${SOURCES} memcpy.c)
+endif (USE_CUSTOM_MEMCPY)
+
 if (NOT DISABLE_AIO)
     check_include_files("libaio.h" HAVE_LIBAIO_H)
     if (NOT HAVE_LIBAIO_H)
@@ -48,7 +53,6 @@ if (NOT DISABLE_AIO)
     target_link_libraries(fastwriter aio)
 endif (NOT DISABLE_AIO)
 
-
 set(TARNAME "fastwriter")
 set(PACKAGE_VERSION ${FASTWRITER_VERSION})
 set(PACKAGE_NAME "${TARNAME}")

+ 1 - 0
config.h.in

@@ -1,3 +1,4 @@
 #cmakedefine HAVE_LINUX_FALLOC_H
 #cmakedefine DISABLE_XFS_REALTIME
 #cmakedefine DISABLE_AIO
+#cmakedefine USE_CUSTOM_MEMCPY

+ 10 - 4
fastwriter.c

@@ -15,11 +15,17 @@
 
 #include <fcntl.h>
 
-
 #include "private.h"
 #include "default.h"
 #include "sysinfo.h"
 
+#ifdef USE_CUSTOM_MEMCPY
+# include "memcpy.h"
+#else /* USE_CUSTOM_MEMCPY */
+# define fast_memcpy memcpy
+#endif /* USE_CUSTOM_MEMCPY */
+
+
 fastwriter_t *fastwriter_init(const char *fs, fastwriter_flags_t flags) {
     fastwriter_t *ctx;
     
@@ -275,11 +281,11 @@ int fastwriter_push(fastwriter_t *ctx, size_t size, const void *data) {
     if (part1 < size) {
 	    // tail < pos (we have checked for free space)
 	end = size - part1;
-	memcpy(ctx->buffer + ctx->pos, data, part1);
-	memcpy(ctx->buffer, data + part1, end);
+	fast_memcpy(ctx->buffer + ctx->pos, data, part1);
+	fast_memcpy(ctx->buffer, data + part1, end);
 	ctx->pos = end;
     } else {
-	memcpy(ctx->buffer + ctx->pos, data, size);
+	fast_memcpy(ctx->buffer + ctx->pos, data, size);
 	ctx->pos += size;
 	
 	if (ctx->pos == ctx->size) ctx->pos = 0;

+ 344 - 0
memcpy.c

@@ -0,0 +1,344 @@
+/********************************************************************
+ ** File:     memcpy.c
+ **
+ ** Copyright (C) 1999-2010 Daniel Vik
+ ** 
+ ** This software is provided 'as-is', without any express or implied
+ ** warranty. In no event will the authors be held liable for any
+ ** damages arising from the use of this software.
+ ** Permission is granted to anyone to use this software for any
+ ** purpose, including commercial applications, and to alter it and
+ ** redistribute it freely, subject to the following restrictions:
+ ** 
+ ** 1. The origin of this software must not be misrepresented; you
+ **    must not claim that you wrote the original software. If you
+ **    use this software in a product, an acknowledgment in the
+ **    use this software in a product, an acknowledgment in the
+ **    product documentation would be appreciated but is not
+ **    required.
+ ** 
+ ** 2. Altered source versions must be plainly marked as such, and
+ **    must not be misrepresented as being the original software.
+ ** 
+ ** 3. This notice may not be removed or altered from any source
+ **    distribution.
+ ** 
+ ** 
+ ** Description: Implementation of the standard library function memcpy.
+ **             This implementation of memcpy() is ANSI-C89 compatible.
+ **
+ **             The following configuration options can be set:
+ **
+ **           LITTLE_ENDIAN   - Uses processor with little endian
+ **                             addressing. Default is big endian.
+ **
+ **           PRE_INC_PTRS    - Use pre increment of pointers.
+ **                             Default is post increment of
+ **                             pointers.
+ **
+ **           INDEXED_COPY    - Copying data using array indexing.
+ **                             Using this option, disables the
+ **                             PRE_INC_PTRS option.
+ **
+ **           MEMCPY_64BIT    - Compiles memcpy for 64 bit 
+ **                             architectures
+ **
+ **
+ ** Best Settings:
+ **
+ ** Intel x86:  LITTLE_ENDIAN and INDEXED_COPY
+ **
+ *******************************************************************/
+
+
+
+/********************************************************************
+ ** Configuration definitions.
+ *******************************************************************/
+
+#define LITTLE_ENDIAN
+#define INDEXED_COPY
+
+
+/********************************************************************
+ ** Includes for size_t definition
+ *******************************************************************/
+
+#include <stddef.h>
+
+
+/********************************************************************
+ ** Typedefs
+ *******************************************************************/
+
+typedef unsigned char       UInt8;
+typedef unsigned short      UInt16;
+typedef unsigned int        UInt32;
+#ifdef _WIN32
+typedef unsigned __int64    UInt64;
+#else
+typedef unsigned long long  UInt64;
+#endif
+
+#ifdef MEMCPY_64BIT
+typedef UInt64              UIntN;
+#define TYPE_WIDTH          8L
+#else
+typedef UInt32              UIntN;
+#define TYPE_WIDTH          4L
+#endif
+
+
+/********************************************************************
+ ** Remove definitions when INDEXED_COPY is defined.
+ *******************************************************************/
+
+#if defined (INDEXED_COPY)
+#if defined (PRE_INC_PTRS)
+#undef PRE_INC_PTRS
+#endif /*PRE_INC_PTRS*/
+#endif /*INDEXED_COPY*/
+
+
+
+/********************************************************************
+ ** Definitions for pre and post increment of pointers.
+ *******************************************************************/
+
+#if defined (PRE_INC_PTRS)
+
+#define START_VAL(x)            (x)--
+#define INC_VAL(x)              *++(x)
+#define CAST_TO_U8(p, o)        ((UInt8*)p + o + TYPE_WIDTH)
+#define WHILE_DEST_BREAK        (TYPE_WIDTH - 1)
+#define PRE_LOOP_ADJUST         - (TYPE_WIDTH - 1)
+#define PRE_SWITCH_ADJUST       + 1
+
+#else /*PRE_INC_PTRS*/
+
+#define START_VAL(x)
+#define INC_VAL(x)              *(x)++
+#define CAST_TO_U8(p, o)        ((UInt8*)p + o)
+#define WHILE_DEST_BREAK        0
+#define PRE_LOOP_ADJUST
+#define PRE_SWITCH_ADJUST
+
+#endif /*PRE_INC_PTRS*/
+
+
+
+/********************************************************************
+ ** Definitions for endians
+ *******************************************************************/
+
+#if defined (LITTLE_ENDIAN)
+
+#define SHL >>
+#define SHR <<
+
+#else /* LITTLE_ENDIAN */
+
+#define SHL <<
+#define SHR >>
+
+#endif /* LITTLE_ENDIAN */
+
+
+
+/********************************************************************
+ ** Macros for copying words of  different alignment.
+ ** Uses incremening pointers.
+ *******************************************************************/
+
+#define CP_INCR() {                         \
+    INC_VAL(dstN) = INC_VAL(srcN);          \
+}
+
+#define CP_INCR_SH(shl, shr) {              \
+    dstWord   = srcWord SHL shl;            \
+    srcWord   = INC_VAL(srcN);              \
+    dstWord  |= srcWord SHR shr;            \
+    INC_VAL(dstN) = dstWord;                \
+}
+
+
+
+/********************************************************************
+ ** Macros for copying words of  different alignment.
+ ** Uses array indexes.
+ *******************************************************************/
+
+#define CP_INDEX(idx) {                     \
+    dstN[idx] = srcN[idx];                  \
+}
+
+#define CP_INDEX_SH(x, shl, shr) {          \
+    dstWord   = srcWord SHL shl;            \
+    srcWord   = srcN[x];                    \
+    dstWord  |= srcWord SHR shr;            \
+    dstN[x]  = dstWord;                     \
+}
+
+
+
+/********************************************************************
+ ** Macros for copying words of different alignment.
+ ** Uses incremening pointers or array indexes depending on
+ ** configuration.
+ *******************************************************************/
+
+#if defined (INDEXED_COPY)
+
+#define CP(idx)               CP_INDEX(idx)
+#define CP_SH(idx, shl, shr)  CP_INDEX_SH(idx, shl, shr)
+
+#define INC_INDEX(p, o)       ((p) += (o))
+
+#else /* INDEXED_COPY */
+
+#define CP(idx)               CP_INCR()
+#define CP_SH(idx, shl, shr)  CP_INCR_SH(shl, shr)
+
+#define INC_INDEX(p, o)
+
+#endif /* INDEXED_COPY */
+
+
+#define COPY_REMAINING(count) {                                     \
+    START_VAL(dst8);                                                \
+    START_VAL(src8);                                                \
+                                                                    \
+    switch (count) {                                                \
+    case 7: INC_VAL(dst8) = INC_VAL(src8);                          \
+    case 6: INC_VAL(dst8) = INC_VAL(src8);                          \
+    case 5: INC_VAL(dst8) = INC_VAL(src8);                          \
+    case 4: INC_VAL(dst8) = INC_VAL(src8);                          \
+    case 3: INC_VAL(dst8) = INC_VAL(src8);                          \
+    case 2: INC_VAL(dst8) = INC_VAL(src8);                          \
+    case 1: INC_VAL(dst8) = INC_VAL(src8);                          \
+    case 0:                                                         \
+    default: break;                                                 \
+    }                                                               \
+}
+
+#define COPY_NO_SHIFT() {                                           \
+    UIntN* dstN = (UIntN*)(dst8 PRE_LOOP_ADJUST);                   \
+    UIntN* srcN = (UIntN*)(src8 PRE_LOOP_ADJUST);                   \
+    size_t length = count / TYPE_WIDTH;                             \
+                                                                    \
+    while (length & 7) {                                            \
+        CP_INCR();                                                  \
+        length--;                                                   \
+    }                                                               \
+                                                                    \
+    length /= 8;                                                    \
+                                                                    \
+    while (length--) {                                              \
+        CP(0);                                                      \
+        CP(1);                                                      \
+        CP(2);                                                      \
+        CP(3);                                                      \
+        CP(4);                                                      \
+        CP(5);                                                      \
+        CP(6);                                                      \
+        CP(7);                                                      \
+                                                                    \
+        INC_INDEX(dstN, 8);                                         \
+        INC_INDEX(srcN, 8);                                         \
+    }                                                               \
+                                                                    \
+    src8 = CAST_TO_U8(srcN, 0);                                     \
+    dst8 = CAST_TO_U8(dstN, 0);                                     \
+                                                                    \
+    COPY_REMAINING(count & (TYPE_WIDTH - 1));                       \
+                                                                    \
+    return dest;                                                    \
+}
+
+
+
+#define COPY_SHIFT(shift) {                                         \
+    UIntN* dstN  = (UIntN*)((((UIntN)dst8) PRE_LOOP_ADJUST) &       \
+                             ~(TYPE_WIDTH - 1));                    \
+    UIntN* srcN  = (UIntN*)((((UIntN)src8) PRE_LOOP_ADJUST) &       \
+                             ~(TYPE_WIDTH - 1));                    \
+    size_t length  = count / TYPE_WIDTH;                            \
+    UIntN srcWord = INC_VAL(srcN);                                  \
+    UIntN dstWord;                                                  \
+                                                                    \
+    while (length & 7) {                                            \
+        CP_INCR_SH(8 * shift, 8 * (TYPE_WIDTH - shift));            \
+        length--;                                                   \
+    }                                                               \
+                                                                    \
+    length /= 8;                                                    \
+                                                                    \
+    while (length--) {                                              \
+        CP_SH(0, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
+        CP_SH(1, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
+        CP_SH(2, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
+        CP_SH(3, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
+        CP_SH(4, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
+        CP_SH(5, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
+        CP_SH(6, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
+        CP_SH(7, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
+                                                                    \
+        INC_INDEX(dstN, 8);                                         \
+        INC_INDEX(srcN, 8);                                         \
+    }                                                               \
+                                                                    \
+    src8 = CAST_TO_U8(srcN, (shift - TYPE_WIDTH));                  \
+    dst8 = CAST_TO_U8(dstN, 0);                                     \
+                                                                    \
+    COPY_REMAINING(count & (TYPE_WIDTH - 1));                       \
+                                                                    \
+    return dest;                                                    \
+}
+
+
+/********************************************************************
+ **
+ ** void *memcpy(void *dest, const void *src, size_t count)
+ **
+ ** Args:     dest        - pointer to destination buffer
+ **           src         - pointer to source buffer
+ **           count       - number of bytes to copy
+ **
+ ** Return:   A pointer to destination buffer
+ **
+ ** Purpose:  Copies count bytes from src to dest. 
+ **           No overlap check is performed.
+ **
+ *******************************************************************/
+
+void *fast_memcpy(void *dest, const void *src, size_t count) 
+{
+    UInt8* dst8 = (UInt8*)dest;
+    UInt8* src8 = (UInt8*)src;
+
+    if (count < 8) {
+        COPY_REMAINING(count);
+        return dest;
+    }
+
+    START_VAL(dst8);
+    START_VAL(src8);
+
+    while (((UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) {
+        INC_VAL(dst8) = INC_VAL(src8);
+        count--;
+    }
+
+    switch ((((UIntN)src8) PRE_SWITCH_ADJUST) & (TYPE_WIDTH - 1)) {
+    case 0: COPY_NO_SHIFT(); break;
+    case 1: COPY_SHIFT(1);   break;
+    case 2: COPY_SHIFT(2);   break;
+    case 3: COPY_SHIFT(3);   break;
+#if TYPE_WIDTH > 4
+    case 4: COPY_SHIFT(4);   break;
+    case 5: COPY_SHIFT(5);   break;
+    case 6: COPY_SHIFT(6);   break;
+    case 7: COPY_SHIFT(7);   break;
+#endif
+    }
+}

+ 63 - 0
memcpy.h

@@ -0,0 +1,63 @@
+/********************************************************************
+ ** File:     memcpy.h
+ **
+ ** Copyright (C) 2005 Daniel Vik
+ ** 
+ ** This software is provided 'as-is', without any express or implied
+ ** warranty. In no event will the authors be held liable for any
+ ** damages arising from the use of this software.
+ ** Permission is granted to anyone to use this software for any
+ ** purpose, including commercial applications, and to alter it and
+ ** redistribute it freely, subject to the following restrictions:
+ ** 
+ ** 1. The origin of this software must not be misrepresented; you
+ **    must not claim that you wrote the original software. If you
+ **    use this software in a product, an acknowledgment in the
+ **    use this software in a product, an acknowledgment in the
+ **    product documentation would be appreciated but is not
+ **    required.
+ ** 
+ ** 2. Altered source versions must be plainly marked as such, and
+ **    must not be misrepresented as being the original software.
+ ** 
+ ** 3. This notice may not be removed or altered from any source
+ **    distribution.
+ ** 
+ ** 
+ ** Description: Implementation of the standard library function memcpy.
+ **             This implementation of memcpy() is ANSI-C89 compatible.
+ **
+ *******************************************************************/
+
+
+/********************************************************************
+ ** Includes for size_t definition
+ *******************************************************************/
+
+#include <stddef.h>
+
+
+/********************************************************************
+ **
+ ** void *memcpy(void *dest, const void *src, size_t count)
+ **
+ ** Args:     dest    - pointer to destination buffer
+ **           src     - pointer to source buffer
+ **           count   - number of bytes to copy
+ **
+ ** Return:   A pointer to destination buffer
+ **
+ ** Purpose:  Copies count bytes from src to dest. No overlap check
+ **           is performed.
+ **
+ *******************************************************************/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *fast_memcpy(void *dest, const void *src, size_t count);
+
+#ifdef __cplusplus
+}
+#endif