diff --git a/tools.c b/tools.c index a14f799..b7281b0 100644 --- a/tools.c +++ b/tools.c @@ -1054,6 +1054,11 @@ bool cSafeFile::Close(void) // --- cUnbufferedFile ------------------------------------------------------- #define USE_FADVISE +#define USE_DIRECTIO + +// O_DIRECT can have various alignment restrictions, usually at most +// the block size of the filesystem. 4096 bytes should be enough. +#define ALIGN_DIO 4096 //#define dfsyslog dsyslog // uncomment to turn on fadvise related logging #define dfsyslog(a...) do {} while (0) @@ -1071,7 +1076,17 @@ cUnbufferedFile::~cUnbufferedFile() int cUnbufferedFile::Open(const char *FileName, int Flags, mode_t Mode) { Close(); - fd = open(FileName, Flags, Mode); +#ifdef USE_DIRECTIO + if (Flags&(O_WRONLY|O_RDWR)) { + fd = open(FileName, Flags|O_DIRECT, Mode); + directio = 1; + } +#endif + if (fd==-1) { + directio = 0; + fd = open(FileName, Flags, Mode); + } + dsyslog("Using %s IO to access %s", directio?"DIRECT":"normal", FileName); curpos = 0; #ifdef USE_FADVISE lastpos = 0; @@ -1101,7 +1116,7 @@ int cUnbufferedFile::Close(void) free(wbuf); } #ifdef USE_FADVISE - if (fd >= 0) { + if (fd >= 0 || !directio) { if (totwritten) // if we wrote anything make sure the data has hit the disk before fdatasync(fd); // calling fadvise, as this is our last chance to un-cache it. posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); @@ -1243,7 +1258,36 @@ ssize_t cUnbufferedFile::Read(void *Data, size_t Size) ssize_t cUnbufferedFile::WriteBuf(const void *Data, size_t Size) { if (fd >=0) { - ssize_t bytesWritten = safe_write(fd, Data, Size); + ssize_t bytesWritten; + +#ifdef USE_DIRECTIO + if (directio) { + // write properly sized buffers directly. + if ((Size & (ALIGN_DIO-1)) == 0) { + bytesWritten = safe_write(fd, Data, Size); + curpos += bytesWritten; + return bytesWritten; + } + // in the unlikely case of a short write (inevitable when closing) + // pad the data with zeros, write it, then truncate the file. + int padding = ALIGN_DIO - (Size & (ALIGN_DIO-1)); + memset((char *)Data+Size, 0, padding); + bytesWritten = safe_write(fd, Data, Size+padding); + + padding = bytesWritten - Size; + if (padding<0) + padding = 0; + bytesWritten -= padding; + curpos += bytesWritten; + + lseek(fd, -padding, SEEK_CUR); + ftruncate(fd, curpos); + // note: past this point the file offset is likely unaligned + // so further directio shouldn't happen. + return min(bytesWritten, (ssize_t)Size); + } +#endif + bytesWritten = safe_write(fd, Data, Size); //dsyslog("WRIT: fd:%3d %9zd .. %9zd SIZE: %6zd", fd, curpos, curpos+Size, Size); #ifdef USE_FADVISE if (bytesWritten > 0) { @@ -1296,18 +1340,23 @@ ssize_t cUnbufferedFile::Write(const void *Data, size_t Size) { if (!wbuf) { wbuf_chunk = cutting?MEGABYTE(8):MEGABYTE(4); - wbuf = MALLOC(uchar,wbuf_chunk); - if (!wbuf) + if (posix_memalign(&wbuf, ALIGN_DIO, wbuf_chunk)) { + directio = 0; return WriteBuf(Data, Size); + } wbuf_len = 0; } if (Size <= wbuf_chunk-wbuf_len) { memcpy(wbuf+wbuf_len, Data, Size); wbuf_len += Size; } else { - WriteBuf(wbuf, wbuf_len); - memcpy(wbuf, Data, Size); - wbuf_len = Size; + unsigned l = wbuf_chunk-wbuf_len; + if (l) + memcpy(wbuf+wbuf_len, Data, l); + WriteBuf(wbuf, wbuf_chunk); + + memcpy(wbuf, (char *)Data+l, Size-l); + wbuf_len = Size-l; } return Size; } diff --git a/tools.h b/tools.h index ce7283c..c2ff493 100644 --- a/tools.h +++ b/tools.h @@ -255,6 +255,7 @@ private: size_t written; size_t totwritten; int cutting; + int directio; size_t writebuffer; int FadviseDrop(off_t Offset, off_t Len); int FadviseRead(off_t Offset, off_t Len);