[PATCH 0/4][SRU][BIONIC] block: fix silent corruption in Linux kernel 4.15

classic Classic list List threaded Threaded
8 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 0/4][SRU][BIONIC] block: fix silent corruption in Linux kernel 4.15

Colin Ian King-2
From: Colin Ian King <[hidden email]>

Buglink: https://bugs.launchpad.net/bugs/1796542

== SRU Justification ==

A silent data corruption was introduced in v4.10-rc1 with commit
72ecad22d9f198aafee64218512e02ffa7818671 and was fixed in v4.18-rc7
with commit 17d51b10d7773e4618bcac64648f30f12d4078fb. It affects
users of O_DIRECT, in our case a KVM virtual machine with drives
which use qemu's "cache=none" option.

== Fix ==

Upstream commits:

0aa69fd32a5f766e997ca8ab4723c5a1146efa8b
  block: add a lower-level bio_add_page interface

b403ea2404889e1227812fa9657667a1deb9c694
  block: bio_iov_iter_get_pages: fix size of last iovec

9362dd1109f87a9d0a798fbc890cb339c171ed35
  blkdev: __blkdev_direct_IO_simple: fix leak in error case

17d51b10d7773e4618bcac64648f30f12d4078fb
  block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs

The first 3 patches are required for a clean application of the final
patch that actually addresses the problem with a fix to this known
issue.

== Regression Potential ==

This touches the block layer, so there is risk potential in data
corruption. The fixes have several weeks in the upstream kernel and
so far, I see no subsequent fixes required.

== Test Case ==

Build the program listed below [1]
kudos to Jan Kara, and run with:

dd if=/dev/zero if=loop.img bs=1M count=2048
sudo losetup /dev/loop0 loop.img

./blkdev-dio-test /dev/loop0 0 &
./blkdev-dio-test /dev/loop0 2048 &

Without the fix, ones lost writes fairly soon.  Without the fix, this
runs without any losy write messages.

blkdev-dio-test.c:

#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <sys/uio.h>

#define PAGE_SIZE 4096
#define SECT_SIZE 512
#define BUF_OFF (2*SECT_SIZE)

int main(int argc, char **argv)
{
        int fd = open(argv[1], O_RDWR | O_DIRECT);
        int ret;
        char *buf;
        loff_t off;
        struct iovec iov[2];
        unsigned int seq;

        if (fd < 0) {
                perror("open");
                return 1;
        }

        off = strtol(argv[2], NULL, 10);

        buf = aligned_alloc(PAGE_SIZE, PAGE_SIZE);

        iov[0].iov_base = buf;
        iov[0].iov_len = SECT_SIZE;
        iov[1].iov_base = buf + BUF_OFF;
        iov[1].iov_len = SECT_SIZE;

        seq = 0;
        memset(buf, 0, PAGE_SIZE);
        while (1) {
                *(unsigned int *)buf = seq;
                *(unsigned int *)(buf + BUF_OFF) = seq;
                ret = pwritev(fd, iov, 2, off);
                if (ret < 0) {
                        perror("pwritev");
                        return 1;
                }
                if (ret != 2*SECT_SIZE) {
                        fprintf(stderr, "Short pwritev: %d\n", ret);
                        return 1;
                }
                ret = pread(fd, buf, PAGE_SIZE, off);
                if (ret < 0) {
                        perror("pread");
                        return 1;
                }
                if (ret != PAGE_SIZE) {
                        fprintf(stderr, "Short read: %d\n", ret);
                        return 1;
                }
                if (*(unsigned int *)buf != seq ||
                    *(unsigned int *)(buf + SECT_SIZE) != seq) {
                        printf("Lost write %u: %u %u\n", seq, *(unsigned int *)buf, *(unsigned int *)(buf + SECT_SIZE));
                        return 1;
                }
                seq++;
        }

        return 0;
}

References:
[1] https://www.spinics.net/lists/linux-block/msg28507.html

---

Christoph Hellwig (1):
  block: add a lower-level bio_add_page interface

Martin Wilck (3):
  block: bio_iov_iter_get_pages: fix size of last iovec
  blkdev: __blkdev_direct_IO_simple: fix leak in error case
  block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs

 block/bio.c         | 149 ++++++++++++++++++++++++++++++--------------
 fs/block_dev.c      |   9 +--
 include/linux/bio.h |   9 +++
 3 files changed, 117 insertions(+), 50 deletions(-)

--
2.17.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/4][SRU][BIONIC] block: add a lower-level bio_add_page interface

Colin Ian King-2
From: Christoph Hellwig <[hidden email]>

Buglink: https://bugs.launchpad.net/bugs/1796542

For the upcoming removal of buffer heads in XFS we need to keep track of
the number of outstanding writeback requests per page.  For this we need
to know if bio_add_page merged a region with the previous bvec or not.
Instead of adding additional arguments this refactors bio_add_page to
be implemented using three lower level helpers which users like XFS can
use directly if they care about the merge decisions.

Signed-off-by: Christoph Hellwig <[hidden email]>
Reviewed-by: Jens Axboe <[hidden email]>
Reviewed-by: Ming Lei <[hidden email]>
Reviewed-by: Darrick J. Wong <[hidden email]>
Signed-off-by: Darrick J. Wong <[hidden email]>
(cherry picked from commit 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b)
Signed-off-by: Colin Ian King <[hidden email]>
---
 block/bio.c         | 96 +++++++++++++++++++++++++++++----------------
 include/linux/bio.h |  9 +++++
 2 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 4b48f8eefc4c..2636d15af979 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -773,7 +773,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
  return 0;
  }
 
- if (bio->bi_vcnt >= bio->bi_max_vecs)
+ if (bio_full(bio))
  return 0;
 
  /*
@@ -821,52 +821,82 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
 EXPORT_SYMBOL(bio_add_pc_page);
 
 /**
- * bio_add_page - attempt to add page to bio
- * @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
+ * __bio_try_merge_page - try appending data to an existing bvec.
+ * @bio: destination bio
+ * @page: page to add
+ * @len: length of the data to add
+ * @off: offset of the data in @page
  *
- * Attempt to add a page to the bio_vec maplist. This will only fail
- * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
+ * Try to add the data at @page + @off to the last bvec of @bio.  This is a
+ * a useful optimisation for file systems with a block size smaller than the
+ * page size.
+ *
+ * Return %true on success or %false on failure.
  */
-int bio_add_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int offset)
+bool __bio_try_merge_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off)
 {
- struct bio_vec *bv;
-
- /*
- * cloned bio must not modify vec list
- */
  if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
- return 0;
+ return false;
 
- /*
- * For filesystems with a blocksize smaller than the pagesize
- * we will often be called with the same page as last time and
- * a consecutive offset.  Optimize this special case.
- */
  if (bio->bi_vcnt > 0) {
- bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 
- if (page == bv->bv_page &&
-    offset == bv->bv_offset + bv->bv_len) {
+ if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
  bv->bv_len += len;
- goto done;
+ bio->bi_iter.bi_size += len;
+ return true;
  }
  }
+ return false;
+}
+EXPORT_SYMBOL_GPL(__bio_try_merge_page);
 
- if (bio->bi_vcnt >= bio->bi_max_vecs)
- return 0;
+/**
+ * __bio_add_page - add page to a bio in a new segment
+ * @bio: destination bio
+ * @page: page to add
+ * @len: length of the data to add
+ * @off: offset of the data in @page
+ *
+ * Add the data at @page + @off to @bio as a new bvec.  The caller must ensure
+ * that @bio has space for another bvec.
+ */
+void __bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off)
+{
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
 
- bv = &bio->bi_io_vec[bio->bi_vcnt];
- bv->bv_page = page;
- bv->bv_len = len;
- bv->bv_offset = offset;
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+ WARN_ON_ONCE(bio_full(bio));
+
+ bv->bv_page = page;
+ bv->bv_offset = off;
+ bv->bv_len = len;
 
- bio->bi_vcnt++;
-done:
  bio->bi_iter.bi_size += len;
+ bio->bi_vcnt++;
+}
+EXPORT_SYMBOL_GPL(__bio_add_page);
+
+/**
+ * bio_add_page - attempt to add page to bio
+ * @bio: destination bio
+ * @page: page to add
+ * @len: vec entry length
+ * @offset: vec entry offset
+ *
+ * Attempt to add a page to the bio_vec maplist. This will only fail
+ * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
+ */
+int bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ if (!__bio_try_merge_page(bio, page, len, offset)) {
+ if (bio_full(bio))
+ return 0;
+ __bio_add_page(bio, page, len, offset);
+ }
  return len;
 }
 EXPORT_SYMBOL(bio_add_page);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a98c6ac575cf..3440870712d4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -123,6 +123,11 @@ static inline void *bio_data(struct bio *bio)
  return NULL;
 }
 
+static inline bool bio_full(struct bio *bio)
+{
+ return bio->bi_vcnt >= bio->bi_max_vecs;
+}
+
 /*
  * will die
  */
@@ -447,6 +452,10 @@ void bio_chain(struct bio *, struct bio *);
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
    unsigned int, unsigned int);
+bool __bio_try_merge_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off);
+void __bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
--
2.17.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/4][SRU][BIONIC] block: bio_iov_iter_get_pages: fix size of last iovec

Colin Ian King-2
In reply to this post by Colin Ian King-2
From: Martin Wilck <[hidden email]>

Buglink: https://bugs.launchpad.net/bugs/1796542

If the last page of the bio is not "full", the length of the last
vector slot needs to be corrected. This slot has the index
(bio->bi_vcnt - 1), but only in bio->bi_io_vec. In the "bv" helper
array, which is shifted by the value of bio->bi_vcnt at function
invocation, the correct index is (nr_pages - 1).

v2: improved readability following suggestions from Ming Lei.
v3: followed a formatting suggestion from Christoph Hellwig.

Fixes: 2cefe4dbaadf ("block: add bio_iov_iter_get_pages()")
Reviewed-by: Hannes Reinecke <[hidden email]>
Reviewed-by: Ming Lei <[hidden email]>
Reviewed-by: Jan Kara <[hidden email]>
Reviewed-by: Christoph Hellwig <[hidden email]>
Signed-off-by: Martin Wilck <[hidden email]>
Signed-off-by: Jens Axboe <[hidden email]>
(cherry picked from commit b403ea2404889e1227812fa9657667a1deb9c694)
Signed-off-by: Colin Ian King <[hidden email]>
---
 block/bio.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 2636d15af979..d76372a6a5fe 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -911,16 +911,16 @@ EXPORT_SYMBOL(bio_add_page);
  */
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
- unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
+ unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
  struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
  struct page **pages = (struct page **)bv;
- size_t offset, diff;
+ size_t offset;
  ssize_t size;
 
  size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
  if (unlikely(size <= 0))
  return size ? size : -EFAULT;
- nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
+ idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
 
  /*
  * Deep magic below:  We need to walk the pinned pages backwards
@@ -933,17 +933,15 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
  bio->bi_iter.bi_size += size;
  bio->bi_vcnt += nr_pages;
 
- diff = (nr_pages * PAGE_SIZE - offset) - size;
- while (nr_pages--) {
- bv[nr_pages].bv_page = pages[nr_pages];
- bv[nr_pages].bv_len = PAGE_SIZE;
- bv[nr_pages].bv_offset = 0;
+ while (idx--) {
+ bv[idx].bv_page = pages[idx];
+ bv[idx].bv_len = PAGE_SIZE;
+ bv[idx].bv_offset = 0;
  }
 
  bv[0].bv_offset += offset;
  bv[0].bv_len -= offset;
- if (diff)
- bv[bio->bi_vcnt - 1].bv_len -= diff;
+ bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;
 
  iov_iter_advance(iter, size);
  return 0;
--
2.17.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 3/4][SRU][BIONIC] blkdev: __blkdev_direct_IO_simple: fix leak in error case

Colin Ian King-2
In reply to this post by Colin Ian King-2
From: Martin Wilck <[hidden email]>

Buglink: https://bugs.launchpad.net/bugs/1796542

Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io")
Reviewed-by: Ming Lei <[hidden email]>
Reviewed-by: Hannes Reinecke <[hidden email]>
Reviewed-by: Christoph Hellwig <[hidden email]>
Signed-off-by: Martin Wilck <[hidden email]>
Signed-off-by: Jens Axboe <[hidden email]>
(cherry picked from commit 9362dd1109f87a9d0a798fbc890cb339c171ed35)
Signed-off-by: Colin Ian King <[hidden email]>
---
 fs/block_dev.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 82c823ef06a6..74b4ae9b7ba0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -219,7 +219,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 
  ret = bio_iov_iter_get_pages(&bio, iter);
  if (unlikely(ret))
- return ret;
+ goto out;
  ret = bio.bi_iter.bi_size;
 
  if (iov_iter_rw(iter) == READ) {
@@ -248,12 +248,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
  put_page(bvec->bv_page);
  }
 
- if (vecs != inline_vecs)
- kfree(vecs);
-
  if (unlikely(bio.bi_status))
  ret = blk_status_to_errno(bio.bi_status);
 
+out:
+ if (vecs != inline_vecs)
+ kfree(vecs);
+
  bio_uninit(&bio);
 
  return ret;
--
2.17.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 4/4][SRU][BIONIC] block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs

Colin Ian King-2
In reply to this post by Colin Ian King-2
From: Martin Wilck <[hidden email]>

Buglink: https://bugs.launchpad.net/bugs/1796542

bio_iov_iter_get_pages() currently only adds pages for the next non-zero
segment from the iov_iter to the bio. That's suboptimal for callers,
which typically try to pin as many pages as fit into the bio. This patch
converts the current bio_iov_iter_get_pages() into a static helper, and
introduces a new helper that allocates as many pages as

 1) fit into the bio,
 2) are present in the iov_iter,
 3) and can be pinned by MM.

Error is returned only if zero pages could be pinned. Because of 3), a
zero return value doesn't necessarily mean all pages have been pinned.
Callers that have to pin every page in the iov_iter must still call this
function in a loop (this is currently the case).

This change matters most for __blkdev_direct_IO_simple(), which calls
bio_iov_iter_get_pages() only once. If it obtains less pages than
requested, it returns a "short write" or "short read", and
__generic_file_write_iter() falls back to buffered writes, which may
lead to data corruption.

Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io")
Reviewed-by: Christoph Hellwig <[hidden email]>
Signed-off-by: Martin Wilck <[hidden email]>
Signed-off-by: Jens Axboe <[hidden email]>
(cherry picked from commit 17d51b10d7773e4618bcac64648f30f12d4078fb)
Signed-off-by: Colin Ian King <[hidden email]>
---
 block/bio.c | 35 ++++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index d76372a6a5fe..415c65b9c590 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -902,14 +902,16 @@ int bio_add_page(struct bio *bio, struct page *page,
 EXPORT_SYMBOL(bio_add_page);
 
 /**
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
  * @bio: bio to add pages to
  * @iter: iov iterator describing the region to be mapped
  *
- * Pins as many pages from *iter and appends them to @bio's bvec array. The
+ * Pins pages from *iter and appends them to @bio's bvec array. The
  * pages will have to be released using put_page() when done.
+ * For multi-segment *iter, this function only adds pages from the
+ * the next non-empty segment of the iov iterator.
  */
-int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
  unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
  struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
@@ -946,6 +948,33 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
  iov_iter_advance(iter, size);
  return 0;
 }
+
+/**
+ * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * @bio: bio to add pages to
+ * @iter: iov iterator describing the region to be mapped
+ *
+ * Pins pages from *iter and appends them to @bio's bvec array. The
+ * pages will have to be released using put_page() when done.
+ * The function tries, but does not guarantee, to pin as many pages as
+ * fit into the bio, or are requested in *iter, whatever is smaller.
+ * If MM encounters an error pinning the requested pages, it stops.
+ * Error is returned only if 0 pages could be pinned.
+ */
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+ unsigned short orig_vcnt = bio->bi_vcnt;
+
+ do {
+ int ret = __bio_iov_iter_get_pages(bio, iter);
+
+ if (unlikely(ret))
+ return bio->bi_vcnt > orig_vcnt ? 0 : ret;
+
+ } while (iov_iter_count(iter) && !bio_full(bio));
+
+ return 0;
+}
 EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
 
 static void submit_bio_wait_endio(struct bio *bio)
--
2.17.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

ACK: [PATCH 0/4][SRU][BIONIC] block: fix silent corruption in Linux kernel 4.15

Stefan Bader-2
In reply to this post by Colin Ian King-2
On 09.10.2018 18:04, Colin King wrote:

> From: Colin Ian King <[hidden email]>
>
> Buglink: https://bugs.launchpad.net/bugs/1796542
>
> == SRU Justification ==
>
> A silent data corruption was introduced in v4.10-rc1 with commit
> 72ecad22d9f198aafee64218512e02ffa7818671 and was fixed in v4.18-rc7
> with commit 17d51b10d7773e4618bcac64648f30f12d4078fb. It affects
> users of O_DIRECT, in our case a KVM virtual machine with drives
> which use qemu's "cache=none" option.
>
> == Fix ==
>
> Upstream commits:
>
> 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b
>   block: add a lower-level bio_add_page interface
>
> b403ea2404889e1227812fa9657667a1deb9c694
>   block: bio_iov_iter_get_pages: fix size of last iovec
>
> 9362dd1109f87a9d0a798fbc890cb339c171ed35
>   blkdev: __blkdev_direct_IO_simple: fix leak in error case
>
> 17d51b10d7773e4618bcac64648f30f12d4078fb
>   block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs
>
> The first 3 patches are required for a clean application of the final
> patch that actually addresses the problem with a fix to this known
> issue.
>
> == Regression Potential ==
>
> This touches the block layer, so there is risk potential in data
> corruption. The fixes have several weeks in the upstream kernel and
> so far, I see no subsequent fixes required.
>
> == Test Case ==
>
> Build the program listed below [1]
> kudos to Jan Kara, and run with:
>
> dd if=/dev/zero if=loop.img bs=1M count=2048
> sudo losetup /dev/loop0 loop.img
>
> ./blkdev-dio-test /dev/loop0 0 &
> ./blkdev-dio-test /dev/loop0 2048 &
>
> Without the fix, ones lost writes fairly soon.  Without the fix, this
> runs without any losy write messages.
>
> blkdev-dio-test.c:
>
> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <string.h>
> #include <stdlib.h>
> #include <sys/uio.h>
>
> #define PAGE_SIZE 4096
> #define SECT_SIZE 512
> #define BUF_OFF (2*SECT_SIZE)
>
> int main(int argc, char **argv)
> {
> int fd = open(argv[1], O_RDWR | O_DIRECT);
> int ret;
> char *buf;
> loff_t off;
> struct iovec iov[2];
> unsigned int seq;
>
> if (fd < 0) {
> perror("open");
> return 1;
> }
>
> off = strtol(argv[2], NULL, 10);
>
> buf = aligned_alloc(PAGE_SIZE, PAGE_SIZE);
>
> iov[0].iov_base = buf;
> iov[0].iov_len = SECT_SIZE;
> iov[1].iov_base = buf + BUF_OFF;
> iov[1].iov_len = SECT_SIZE;
>
> seq = 0;
> memset(buf, 0, PAGE_SIZE);
> while (1) {
> *(unsigned int *)buf = seq;
> *(unsigned int *)(buf + BUF_OFF) = seq;
> ret = pwritev(fd, iov, 2, off);
> if (ret < 0) {
> perror("pwritev");
> return 1;
> }
> if (ret != 2*SECT_SIZE) {
> fprintf(stderr, "Short pwritev: %d\n", ret);
> return 1;
> }
> ret = pread(fd, buf, PAGE_SIZE, off);
> if (ret < 0) {
> perror("pread");
> return 1;
> }
> if (ret != PAGE_SIZE) {
> fprintf(stderr, "Short read: %d\n", ret);
> return 1;
> }
> if (*(unsigned int *)buf != seq ||
>    *(unsigned int *)(buf + SECT_SIZE) != seq) {
> printf("Lost write %u: %u %u\n", seq, *(unsigned int *)buf, *(unsigned int *)(buf + SECT_SIZE));
> return 1;
> }
> seq++;
> }
>
> return 0;
> }
>
> References:
> [1] https://www.spinics.net/lists/linux-block/msg28507.html
>
> ---
>
> Christoph Hellwig (1):
>   block: add a lower-level bio_add_page interface
>
> Martin Wilck (3):
>   block: bio_iov_iter_get_pages: fix size of last iovec
>   blkdev: __blkdev_direct_IO_simple: fix leak in error case
>   block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs
>
>  block/bio.c         | 149 ++++++++++++++++++++++++++++++--------------
>  fs/block_dev.c      |   9 +--
>  include/linux/bio.h |   9 +++
>  3 files changed, 117 insertions(+), 50 deletions(-)
>
Successful testing and so far changes had no further follow-up changes.

Acked-by: Stefan Bader <[hidden email]>


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team

signature.asc (836 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

ACK: [PATCH 0/4][SRU][BIONIC] block: fix silent corruption in Linux kernel 4.15

Kleber Souza
In reply to this post by Colin Ian King-2
On 10/09/18 18:04, Colin King wrote:

> From: Colin Ian King <[hidden email]>
>
> Buglink: https://bugs.launchpad.net/bugs/1796542
>
> == SRU Justification ==
>
> A silent data corruption was introduced in v4.10-rc1 with commit
> 72ecad22d9f198aafee64218512e02ffa7818671 and was fixed in v4.18-rc7
> with commit 17d51b10d7773e4618bcac64648f30f12d4078fb. It affects
> users of O_DIRECT, in our case a KVM virtual machine with drives
> which use qemu's "cache=none" option.
>
> == Fix ==
>
> Upstream commits:
>
> 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b
>   block: add a lower-level bio_add_page interface
>
> b403ea2404889e1227812fa9657667a1deb9c694
>   block: bio_iov_iter_get_pages: fix size of last iovec
>
> 9362dd1109f87a9d0a798fbc890cb339c171ed35
>   blkdev: __blkdev_direct_IO_simple: fix leak in error case
>
> 17d51b10d7773e4618bcac64648f30f12d4078fb
>   block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs
>
> The first 3 patches are required for a clean application of the final
> patch that actually addresses the problem with a fix to this known
> issue.
>
> == Regression Potential ==
>
> This touches the block layer, so there is risk potential in data
> corruption. The fixes have several weeks in the upstream kernel and
> so far, I see no subsequent fixes required.
>
> == Test Case ==
>
> Build the program listed below [1]
> kudos to Jan Kara, and run with:
>
> dd if=/dev/zero if=loop.img bs=1M count=2048
> sudo losetup /dev/loop0 loop.img
>
> ./blkdev-dio-test /dev/loop0 0 &
> ./blkdev-dio-test /dev/loop0 2048 &
>
> Without the fix, ones lost writes fairly soon.  Without the fix, this
> runs without any losy write messages.
>
> blkdev-dio-test.c:
>
> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <string.h>
> #include <stdlib.h>
> #include <sys/uio.h>
>
> #define PAGE_SIZE 4096
> #define SECT_SIZE 512
> #define BUF_OFF (2*SECT_SIZE)
>
> int main(int argc, char **argv)
> {
> int fd = open(argv[1], O_RDWR | O_DIRECT);
> int ret;
> char *buf;
> loff_t off;
> struct iovec iov[2];
> unsigned int seq;
>
> if (fd < 0) {
> perror("open");
> return 1;
> }
>
> off = strtol(argv[2], NULL, 10);
>
> buf = aligned_alloc(PAGE_SIZE, PAGE_SIZE);
>
> iov[0].iov_base = buf;
> iov[0].iov_len = SECT_SIZE;
> iov[1].iov_base = buf + BUF_OFF;
> iov[1].iov_len = SECT_SIZE;
>
> seq = 0;
> memset(buf, 0, PAGE_SIZE);
> while (1) {
> *(unsigned int *)buf = seq;
> *(unsigned int *)(buf + BUF_OFF) = seq;
> ret = pwritev(fd, iov, 2, off);
> if (ret < 0) {
> perror("pwritev");
> return 1;
> }
> if (ret != 2*SECT_SIZE) {
> fprintf(stderr, "Short pwritev: %d\n", ret);
> return 1;
> }
> ret = pread(fd, buf, PAGE_SIZE, off);
> if (ret < 0) {
> perror("pread");
> return 1;
> }
> if (ret != PAGE_SIZE) {
> fprintf(stderr, "Short read: %d\n", ret);
> return 1;
> }
> if (*(unsigned int *)buf != seq ||
>    *(unsigned int *)(buf + SECT_SIZE) != seq) {
> printf("Lost write %u: %u %u\n", seq, *(unsigned int *)buf, *(unsigned int *)(buf + SECT_SIZE));
> return 1;
> }
> seq++;
> }
>
> return 0;
> }
>
> References:
> [1] https://www.spinics.net/lists/linux-block/msg28507.html
>
> ---
>
> Christoph Hellwig (1):
>   block: add a lower-level bio_add_page interface
>
> Martin Wilck (3):
>   block: bio_iov_iter_get_pages: fix size of last iovec
>   blkdev: __blkdev_direct_IO_simple: fix leak in error case
>   block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs
>
>  block/bio.c         | 149 ++++++++++++++++++++++++++++++--------------
>  fs/block_dev.c      |   9 +--
>  include/linux/bio.h |   9 +++
>  3 files changed, 117 insertions(+), 50 deletions(-)
>

Acked-by: Kleber Sacilotto de Souza <[hidden email]>

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

APPLIED: [PATCH 0/4][SRU][BIONIC] block: fix silent corruption in Linux kernel 4.15

Stefan Bader-2
In reply to this post by Colin Ian King-2
On 09.10.2018 18:04, Colin King wrote:

> From: Colin Ian King <[hidden email]>
>
> Buglink: https://bugs.launchpad.net/bugs/1796542
>
> == SRU Justification ==
>
> A silent data corruption was introduced in v4.10-rc1 with commit
> 72ecad22d9f198aafee64218512e02ffa7818671 and was fixed in v4.18-rc7
> with commit 17d51b10d7773e4618bcac64648f30f12d4078fb. It affects
> users of O_DIRECT, in our case a KVM virtual machine with drives
> which use qemu's "cache=none" option.
>
> == Fix ==
>
> Upstream commits:
>
> 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b
>   block: add a lower-level bio_add_page interface
>
> b403ea2404889e1227812fa9657667a1deb9c694
>   block: bio_iov_iter_get_pages: fix size of last iovec
>
> 9362dd1109f87a9d0a798fbc890cb339c171ed35
>   blkdev: __blkdev_direct_IO_simple: fix leak in error case
>
> 17d51b10d7773e4618bcac64648f30f12d4078fb
>   block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs
>
> The first 3 patches are required for a clean application of the final
> patch that actually addresses the problem with a fix to this known
> issue.
>
> == Regression Potential ==
>
> This touches the block layer, so there is risk potential in data
> corruption. The fixes have several weeks in the upstream kernel and
> so far, I see no subsequent fixes required.
>
> == Test Case ==
>
> Build the program listed below [1]
> kudos to Jan Kara, and run with:
>
> dd if=/dev/zero if=loop.img bs=1M count=2048
> sudo losetup /dev/loop0 loop.img
>
> ./blkdev-dio-test /dev/loop0 0 &
> ./blkdev-dio-test /dev/loop0 2048 &
>
> Without the fix, ones lost writes fairly soon.  Without the fix, this
> runs without any losy write messages.
>
> blkdev-dio-test.c:
>
> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <string.h>
> #include <stdlib.h>
> #include <sys/uio.h>
>
> #define PAGE_SIZE 4096
> #define SECT_SIZE 512
> #define BUF_OFF (2*SECT_SIZE)
>
> int main(int argc, char **argv)
> {
> int fd = open(argv[1], O_RDWR | O_DIRECT);
> int ret;
> char *buf;
> loff_t off;
> struct iovec iov[2];
> unsigned int seq;
>
> if (fd < 0) {
> perror("open");
> return 1;
> }
>
> off = strtol(argv[2], NULL, 10);
>
> buf = aligned_alloc(PAGE_SIZE, PAGE_SIZE);
>
> iov[0].iov_base = buf;
> iov[0].iov_len = SECT_SIZE;
> iov[1].iov_base = buf + BUF_OFF;
> iov[1].iov_len = SECT_SIZE;
>
> seq = 0;
> memset(buf, 0, PAGE_SIZE);
> while (1) {
> *(unsigned int *)buf = seq;
> *(unsigned int *)(buf + BUF_OFF) = seq;
> ret = pwritev(fd, iov, 2, off);
> if (ret < 0) {
> perror("pwritev");
> return 1;
> }
> if (ret != 2*SECT_SIZE) {
> fprintf(stderr, "Short pwritev: %d\n", ret);
> return 1;
> }
> ret = pread(fd, buf, PAGE_SIZE, off);
> if (ret < 0) {
> perror("pread");
> return 1;
> }
> if (ret != PAGE_SIZE) {
> fprintf(stderr, "Short read: %d\n", ret);
> return 1;
> }
> if (*(unsigned int *)buf != seq ||
>    *(unsigned int *)(buf + SECT_SIZE) != seq) {
> printf("Lost write %u: %u %u\n", seq, *(unsigned int *)buf, *(unsigned int *)(buf + SECT_SIZE));
> return 1;
> }
> seq++;
> }
>
> return 0;
> }
>
> References:
> [1] https://www.spinics.net/lists/linux-block/msg28507.html
>
> ---
>
> Christoph Hellwig (1):
>   block: add a lower-level bio_add_page interface
>
> Martin Wilck (3):
>   block: bio_iov_iter_get_pages: fix size of last iovec
>   blkdev: __blkdev_direct_IO_simple: fix leak in error case
>   block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs
>
>  block/bio.c         | 149 ++++++++++++++++++++++++++++++--------------
>  fs/block_dev.c      |   9 +--
>  include/linux/bio.h |   9 +++
>  3 files changed, 117 insertions(+), 50 deletions(-)
>
Applied to bionic/master-next for re-spin. Thanks.

-Stefan


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team

signature.asc (836 bytes) Download Attachment