[vdr] [PATCH] cUnbufferedFile - another approac to readahead

Artur Skawina art_k at o2.pl
Sat Apr 29 12:29:10 CEST 2006


i suspected the readahead was being too aggressive, hence this -- a more autotuning readahead + general cleanups. I think the only bugfix in here is the one i just sent separately, but this code is from vdr-1.3.45 era, i might have forgotten something. (.46+ kept segfaulting here, found the cause only today -- a missing dependency in the streamdev plugin)

Posting this just in case somebody has trouble w/ the fadvise code and wants to try something else.
Commenting out "#define USE_FADVISE_RA" should disable the manual readahead completely, but still flush written data out, preventing an ongoing recording from eating up all free ram etc.

artur
-------------- next part --------------
--- vdr-1.3.49.org/cutter.c	2006-02-12 10:07:23.000000000 +0000
+++ vdr-1.3.49/cutter.c	2006-04-29 08:54:19.000000000 +0000
@@ -66,7 +66,8 @@ void cCuttingThread::Action(void)
      toFile = toFileName->Open();
      if (!fromFile || !toFile)
         return;
-     fromFile->SetReadAhead(MEGABYTE(20));
+     fromFile->CuttingSrc();
+     toFile->CuttingDst();
      int Index = Mark->position;
      Mark = fromMarks.Next(Mark);
      int FileSize = 0;
@@ -91,7 +92,7 @@ void cCuttingThread::Action(void)
            if (fromIndex->Get(Index++, &FileNumber, &FileOffset, &PictureType, &Length)) {
               if (FileNumber != CurrentFileNumber) {
                  fromFile = fromFileName->SetOffset(FileNumber, FileOffset);
-                 fromFile->SetReadAhead(MEGABYTE(20));
+                 fromFile->CuttingSrc();
                  CurrentFileNumber = FileNumber;
                  }
               if (fromFile) {
@@ -124,6 +125,7 @@ void cCuttingThread::Action(void)
                     error = "toFile 1";
                     break;
                     }
+                 toFile->CuttingDst();
                  FileSize = 0;
                  }
               LastIFrame = 0;
@@ -164,6 +166,7 @@ void cCuttingThread::Action(void)
                        error = "toFile 2";
                        break;
                        }
+                    toFile->CuttingDst();
                     FileSize = 0;
                     }
                  }
--- vdr-1.3.49.org/tools.c	2006-04-21 15:12:47.000000000 +0000
+++ vdr-1.3.49/tools.c	2006-04-29 08:54:19.000000000 +0000
@@ -1060,9 +1060,14 @@ bool cSafeFile::Close(void)
 
 // --- cUnbufferedFile -------------------------------------------------------
 
-#define USE_FADVISE
+#define USE_FADVISE_RA
+#define USE_FADVISE_DB
 
-#define WRITE_BUFFER KILOBYTE(800)
+//#define dfsyslog dsyslog                 // turn on fadvise related logging
+#define dfsyslog(a...) do {} while (0)
+
+#define FADVGRAN   KILOBYTE(4) // AKA fadvise-chunk-size; PAGE_SIZE or getpagesize(2) would also work.
+#define READCHUNK  MEGABYTE(8)
 
 cUnbufferedFile::cUnbufferedFile(void)
 {
@@ -1079,13 +1084,16 @@ int cUnbufferedFile::Open(const char *Fi
   Close();
   fd = open(FileName, Flags, Mode);
   curpos = 0;
-#ifdef USE_FADVISE
-  begin = lastpos = ahead = 0;
-  cachedstart = 0;
-  cachedend = 0;
-  readahead = KILOBYTE(128);
+#if defined(USE_FADVISE_RA) || defined(USE_FADVISE_DB)
+  lastpos = 0;
+  lastjump = 0;
+  cachedstart = cachedend = 0;
+  //readahead = READCHUNK;
+  readahead = KILOBYTE(768);
+  writebuffer = KILOBYTE(800);
   written = 0;
   totwritten = 0;
+  cutting = 0;
   if (fd >= 0)
      posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM); // we could use POSIX_FADV_SEQUENTIAL, but we do our own readahead, disabling the kernel one.
 #endif
@@ -1094,7 +1102,7 @@ int cUnbufferedFile::Open(const char *Fi
 
 int cUnbufferedFile::Close(void)
 {
-#ifdef USE_FADVISE
+#if defined(USE_FADVISE_DB)
   if (fd >= 0) {
      if (totwritten)    // if we wrote anything make sure the data has hit the disk before
         fdatasync(fd);  // calling fadvise, as this is our last chance to un-cache it.
@@ -1111,18 +1119,35 @@ int cUnbufferedFile::Close(void)
 // We try to handle the common cases such as PLAY->FF->PLAY, small
 // jumps, moving editing marks etc.
 
-#define FADVGRAN   KILOBYTE(4) // AKA fadvise-chunk-size; PAGE_SIZE or getpagesize(2) would also work.
-#define READCHUNK  MEGABYTE(8)
+void cUnbufferedFile::CuttingSrc(void)
+{
+  readahead = MEGABYTE(16);
+  cutting = 1;
+}
 
-void cUnbufferedFile::SetReadAhead(size_t ra)
+void cUnbufferedFile::CuttingDst(void)
 {
-  readahead = ra;
+  writebuffer = MEGABYTE(16);
+  cutting = 2;
 }
 
 int cUnbufferedFile::FadviseDrop(off_t Offset, off_t Len)
 {
-  // rounding up the window to make sure that not PAGE_SIZE-aligned data gets freed.
-  return posix_fadvise(fd, Offset - (FADVGRAN - 1), Len + (FADVGRAN - 1) * 2, POSIX_FADV_DONTNEED);
+  // Round up the window to make sure that not PAGE_SIZE-aligned data gets freed.
+  // Note that that also means calling this with Len==0 isn't special (unlike fadvise).
+  dfsyslog("DROP: fd:%3d %9zd .. %9zd SIZE: %6zd", fd, Offset, Offset+Len, Len);
+  off_t prewin = min(Offset ,(off_t)FADVGRAN - 1); // must not wrap below 0.
+  return posix_fadvise(fd, Offset - prewin, Len + prewin + (FADVGRAN - 1), POSIX_FADV_DONTNEED);
+}
+
+// Trigger background readahead on the specified range and add
+// it to the "cached" area so that we can drop the data later.
+int cUnbufferedFile::FadviseRead(off_t Offset, off_t Len)
+{
+  dfsyslog("WANT: fd:%3d %9zd .. %9zd SIZE: %6zd", fd, Offset, Offset+Len, Len);
+  cachedstart = min(cachedstart, Offset);
+  cachedend = max(cachedend, Offset+Len);
+  return posix_fadvise(fd, Offset, Len, POSIX_FADV_WILLNEED);
 }
 
 off_t cUnbufferedFile::Seek(off_t Offset, int Whence)
@@ -1136,54 +1161,78 @@ off_t cUnbufferedFile::Seek(off_t Offset
 ssize_t cUnbufferedFile::Read(void *Data, size_t Size)
 {
   if (fd >= 0) {
-#ifdef USE_FADVISE
+#ifdef USE_FADVISE_RA
      off_t jumped = curpos-lastpos; // nonzero means we're not at the last offset
-     if ((cachedstart < cachedend) && (curpos < cachedstart || curpos > cachedend)) {
+     
+     dfsyslog("READ: fd:%3d %9zd .. %9zd SIZE: %6zd jump: %9zd ra: %7zd", fd, curpos, curpos+Size, Size, jumped, readahead);
+     
+     if (curpos < cachedstart || curpos > cachedend) {
         // current position is outside the cached window -- invalidate it.
-        FadviseDrop(cachedstart, cachedend-cachedstart);
+        if (cachedstart != cachedend)
+           FadviseDrop(cachedstart, cachedend-cachedstart);
         cachedstart = curpos;
         cachedend = curpos;
         }
-     cachedstart = min(cachedstart, curpos);
 #endif
      ssize_t bytesRead = safe_read(fd, Data, Size);
-#ifdef USE_FADVISE
+#ifdef USE_FADVISE_RA
      if (bytesRead > 0) {
+        cachedstart = min(cachedstart, curpos);
         curpos += bytesRead;
-        cachedend = max(cachedend, curpos);
-
         // Read ahead:
-        // no jump? (allow small forward jump still inside readahead window).
-        if (jumped >= 0 && jumped <= (off_t)readahead) {
-           // Trigger the readahead IO, but only if we've used at least
-           // 1/2 of the previously requested area. This avoids calling
-           // fadvise() after every read() call.
-           if (ahead - curpos < (off_t)(readahead / 2)) {
-              posix_fadvise(fd, curpos, readahead, POSIX_FADV_WILLNEED);
-              ahead = curpos + readahead;
-              cachedend = max(cachedend, ahead);
-              }
-           if (readahead < Size * 32) { // automagically tune readahead size.
-              readahead = Size * 32;
+        // no jump or small forward jump still inside readahead window.
+        if (jumped >= 0 && curpos <= cachedend) {
+           // Trigger the readahead IO, but only if we've used at least some of the previously
+           // requested area. This avoids calling fadvise() after every read() call.
+           size_t cachedsize = cachedend - curpos;
+           size_t ra = cachedsize + Size*1 + (size_t)jumped*1;
+           if (cutting)
+              ra += KILOBYTE(128);
+           ra = min(readahead, ra);
+           // Start I/O if we A) used some of the data or B) can read sufficiently large new chunk.
+           // (A) is important when starting w/ a small readahead.
+           if (cachedsize < (ra-ra/4) || cachedsize+KILOBYTE(128) <= ra)
+              FadviseRead(curpos, ra);
+           }
+        else if (jumped >= 0) {    // either large forward jump, or FF (jumps by ~4xSize)
+              FadviseRead(curpos, ((size_t)jumped < Size*8)?(jumped+Size)*2:Size*2);
+        }
+        else /*if (jumped < 0)*/ { // backward jump:
+           // We don't want any readahead, otherwise e.g. fast-rewind gets in trouble.
+#if 0
+           // But we'll do some read behind for sequential short backward jumps.
+           size_t rbsize = -jumped * 16;
+           if (lastjump < 0 && (size_t)-jumped < Size * 8 && (size_t)((curpos-Size) - cachedstart ) < rbsize) {
+              // current position has moved back enough, grow tail window.
+              off_t start = max((off_t)0, (off_t)((curpos - Size) - rbsize));
+              FadviseRead(start, rbsize);
               }
+#endif
+           // We could reduce readahead window here. But this would lead to already
+           // prefetched data being thrown out by the code below; not exactly ideal
+           // if this jump was only caused by play mode transition etc.
+           //readahead = Size * 8;
            }
-        else
-           ahead = curpos; // jumped -> we really don't want any readahead, otherwise e.g. fast-rewind gets in trouble.
+        cachedend = max(cachedend, curpos);
         }
 
      if (cachedstart < cachedend) {
-        if (curpos - cachedstart > READCHUNK * 2) {
+        off_t maxtail = cutting ? KILOBYTE(32) : READCHUNK;
+        off_t maxhead = max(readahead, (size_t)READCHUNK);
+        if (jumped >= 0 && curpos - cachedstart >= maxtail * 2) {
            // current position has moved forward enough, shrink tail window.
-           FadviseDrop(cachedstart, curpos - READCHUNK - cachedstart);
-           cachedstart = curpos - READCHUNK;
+           FadviseDrop(cachedstart, (curpos - maxtail) - cachedstart);
+           cachedstart = curpos - maxtail;
            }
-        else if (cachedend > ahead && cachedend - curpos > READCHUNK * 2) {
+        else if (jumped < 0 && lastjump < 0 && cachedend - curpos >= maxhead * 2) {
            // current position has moved back enough, shrink head window.
-           FadviseDrop(curpos + READCHUNK, cachedend - curpos + READCHUNK);
-           cachedend = curpos + READCHUNK;
+           // (a large readahead value may prevent this)
+           FadviseDrop(curpos + maxhead, cachedend - (curpos + maxhead));
+           cachedend = curpos + maxhead;
            }
         }
      lastpos = curpos;
+     lastjump = jumped;
 #endif
      return bytesRead;
      }
@@ -1194,26 +1243,27 @@ ssize_t cUnbufferedFile::Write(const voi
 {
   if (fd >=0) {
      ssize_t bytesWritten = safe_write(fd, Data, Size);
-#ifdef USE_FADVISE
+     //dsyslog("WRIT: fd:%3d %9zd .. %9zd SIZE: %6zd", fd, curpos, curpos+Size, Size);
+#ifdef USE_FADVISE_DB
      if (bytesWritten > 0) {
-        begin = min(begin, curpos);
+        cachedstart = min(cachedstart, curpos);
         curpos += bytesWritten;
         written += bytesWritten;
-        lastpos = max(lastpos, curpos);
-        if (written > WRITE_BUFFER) {
-           if (lastpos > begin) {
+        cachedend = max(cachedend, curpos);
+        if (written > writebuffer) {
+           if (cachedend > cachedstart) {
               // Now do three things:
-              // 1) Start writeback of begin..lastpos range
+              // 1) Start writeback of cachedstart..cachedend range
               // 2) Drop the already written range (by the previous fadvise call)
               // 3) Handle nonpagealigned data.
-              //    This is why we double the WRITE_BUFFER; the first time around the
+              //    This is why we double the writebuffer; the first time around the
               //    last (partial) page might be skipped, writeback will start only after
               //    second call; the third call will still include this page and finally
               //    drop it from cache.
-              off_t headdrop = min(begin, WRITE_BUFFER * 2L);
-              posix_fadvise(fd, begin - headdrop, lastpos - begin + headdrop, POSIX_FADV_DONTNEED);
+              off_t headdrop = min(cachedstart, (off_t)writebuffer * 2);
+              posix_fadvise(fd, cachedstart - headdrop, cachedend - cachedstart + headdrop, POSIX_FADV_DONTNEED);
               }
-           begin = lastpos = curpos;
+           cachedstart = cachedend = curpos;
            totwritten += written;
            written = 0;
            // The above fadvise() works when writing slowly (recording), but could
@@ -1223,7 +1273,7 @@ ssize_t cUnbufferedFile::Write(const voi
            // So we do another round of flushing, just like above, but at larger
            // intervals -- this should catch any pages that couldn't be released
            // earlier.
-           if (totwritten > MEGABYTE(32)) {
+           if (totwritten > MEGABYTE(32) + writebuffer ) {
               // It seems in some setups, fadvise() does not trigger any I/O and
               // a fdatasync() call would be required do all the work (reiserfs with some
               // kind of write gathering enabled), but the syncs cause (io) load..
--- vdr-1.3.49.org/tools.h	2006-04-16 10:40:45.000000000 +0000
+++ vdr-1.3.49/tools.h	2006-04-29 08:54:19.000000000 +0000
@@ -245,19 +245,22 @@ private:
   off_t curpos;
   off_t cachedstart;
   off_t cachedend;
-  off_t begin;
   off_t lastpos;
-  off_t ahead;
+  off_t lastjump;
   size_t readahead;
   size_t written;
   size_t totwritten;
+  int cutting;
+  size_t writebuffer;
   int FadviseDrop(off_t Offset, off_t Len);
+  int FadviseRead(off_t Offset, off_t Len);
 public:
   cUnbufferedFile(void);
   ~cUnbufferedFile();
   int Open(const char *FileName, int Flags, mode_t Mode = DEFFILEMODE);
   int Close(void);
-  void SetReadAhead(size_t ra);
+  void CuttingSrc(void);
+  void CuttingDst(void);
   off_t Seek(off_t Offset, int Whence);
   ssize_t Read(void *Data, size_t Size);
   ssize_t Write(const void *Data, size_t Size);


More information about the vdr mailing list