[PATCH 0/2] [xenial] CVE-2019-19768

classic Classic list List threaded Threaded
6 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 0/2] [xenial] CVE-2019-19768

benjamin.romer
In the Linux kernel 5.4.0-rc2, there is a use-after-free (read) in the
__blk_add_trace function in kernel/trace/blktrace.c (which is used to fill
out a blk_io_trace structure and place it in a per-cpu sub-buffer).S

Cengiz Can (1):
  blktrace: fix dereference after null check

Jan Kara (1):
  blktrace: Protect q->blk_trace with RCU

 include/linux/blkdev.h       |   2 +-
 include/linux/blktrace_api.h |   6 +-
 kernel/trace/blktrace.c      | 110 ++++++++++++++++++++++++++---------
 3 files changed, 89 insertions(+), 29 deletions(-)

--
2.20.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/2] blktrace: Protect q->blk_trace with RCU

benjamin.romer
From: Jan Kara <[hidden email]>

KASAN is reporting that __blk_add_trace() has a use-after-free issue
when accessing q->blk_trace. Indeed the switching of block tracing (and
thus eventual freeing of q->blk_trace) is completely unsynchronized with
the currently running tracing and thus it can happen that the blk_trace
structure is being freed just while __blk_add_trace() works on it.
Protect accesses to q->blk_trace by RCU during tracing and make sure we
wait for the end of RCU grace period when shutting down tracing. Luckily
that is rare enough event that we can afford that. Note that postponing
the freeing of blk_trace to an RCU callback should better be avoided as
it could have unexpected user visible side-effects as debugfs files
would be still existing for a short while block tracing has been shut
down.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711
CC: [hidden email]
Reviewed-by: Chaitanya Kulkarni <[hidden email]>
Reviewed-by: Ming Lei <[hidden email]>
Tested-by: Ming Lei <[hidden email]>
Reviewed-by: Bart Van Assche <[hidden email]>
Reported-by: Tristan Madani <[hidden email]>
Signed-off-by: Jan Kara <[hidden email]>
Signed-off-by: Jens Axboe <[hidden email]>

CVE-2019-19768
(cherry picked from commit c780e86dd48ef6467a1146cf7d0fe1e05a635039)
[ ben_r: adjusted patch to apply to ubuntu ]
Signed-off-by: Benjamin M Romer <[hidden email]>
---
 include/linux/blkdev.h       |   2 +-
 include/linux/blktrace_api.h |   6 +-
 kernel/trace/blktrace.c      | 107 ++++++++++++++++++++++++++---------
 3 files changed, 86 insertions(+), 29 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 60eb4e90a757..98dd8266f6e3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -432,7 +432,7 @@ struct request_queue {
  unsigned int sg_reserved_size;
  int node;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
- struct blk_trace *blk_trace;
+ struct blk_trace __rcu *blk_trace;
 #endif
  /*
  * for flush operations
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index afc1343df3c7..e644bfe50019 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, const char *fmt, ...);
  **/
 #define blk_add_trace_msg(q, fmt, ...) \
  do { \
- struct blk_trace *bt = (q)->blk_trace; \
+ struct blk_trace *bt; \
+ \
+ rcu_read_lock(); \
+ bt = rcu_dereference((q)->blk_trace); \
  if (unlikely(bt)) \
  __trace_note_message(bt, fmt, ##__VA_ARGS__); \
+ rcu_read_unlock(); \
  } while (0)
 #define BLK_TN_MAX_MSG 128
 
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 210b8e726a97..a222697fd108 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -319,6 +319,7 @@ static void put_probe_ref(void)
 
 static void blk_trace_cleanup(struct blk_trace *bt)
 {
+ synchronize_rcu();
  blk_trace_free(bt);
  put_probe_ref();
 }
@@ -606,8 +607,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
 int blk_trace_startstop(struct request_queue *q, int start)
 {
  int ret;
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
 
+ bt = rcu_dereference_protected(q->blk_trace,
+       lockdep_is_held(&q->blk_trace_mutex));
  if (bt == NULL)
  return -EINVAL;
 
@@ -698,7 +701,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
  **/
 void blk_trace_shutdown(struct request_queue *q)
 {
- if (q->blk_trace) {
+ if (rcu_dereference_protected(q->blk_trace,
+      lockdep_is_held(&q->blk_trace_mutex))) {
  blk_trace_startstop(q, 0);
  blk_trace_remove(q);
  }
@@ -722,10 +726,14 @@ void blk_trace_shutdown(struct request_queue *q)
 static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
      unsigned int nr_bytes, u32 what)
 {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
 
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(rq->q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
  return;
+ }
 
  if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
  what |= BLK_TC_ACT(BLK_TC_PC);
@@ -736,6 +744,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
  __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes,
  rq->cmd_flags, what, rq->errors, 0, NULL);
  }
+ rcu_read_unlock();
 }
 
 static void blk_add_trace_rq_abort(void *ignore,
@@ -785,13 +794,19 @@ static void blk_add_trace_rq_complete(void *ignore,
 static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
       u32 what, int error)
 {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
 
  if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
  return;
+ }
 
  __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
  bio->bi_rw, what, error, 0, NULL);
+ rcu_read_unlock();
 }
 
 static void blk_add_trace_bio_bounce(void *ignore,
@@ -836,10 +851,13 @@ static void blk_add_trace_getrq(void *ignore,
  if (bio)
  blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
  else {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
 
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
  if (bt)
  __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
+ rcu_read_unlock();
  }
 }
 
@@ -851,27 +869,35 @@ static void blk_add_trace_sleeprq(void *ignore,
  if (bio)
  blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
  else {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
 
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
  if (bt)
  __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
  0, 0, NULL);
+ rcu_read_unlock();
  }
 }
 
 static void blk_add_trace_plug(void *ignore, struct request_queue *q)
 {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
 
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
  if (bt)
  __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
+ rcu_read_unlock();
 }
 
 static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
     unsigned int depth, bool explicit)
 {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
 
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
  if (bt) {
  __be64 rpdu = cpu_to_be64(depth);
  u32 what;
@@ -883,14 +909,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
 
  __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
  }
+ rcu_read_unlock();
 }
 
 static void blk_add_trace_split(void *ignore,
  struct request_queue *q, struct bio *bio,
  unsigned int pdu)
 {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
 
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
  if (bt) {
  __be64 rpdu = cpu_to_be64(pdu);
 
@@ -898,6 +927,7 @@ static void blk_add_trace_split(void *ignore,
  bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT,
  bio->bi_error, sizeof(rpdu), &rpdu);
  }
+ rcu_read_unlock();
 }
 
 /**
@@ -917,11 +947,15 @@ static void blk_add_trace_bio_remap(void *ignore,
     struct request_queue *q, struct bio *bio,
     dev_t dev, sector_t from)
 {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
  struct blk_io_trace_remap r;
 
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
  return;
+ }
 
  r.device_from = cpu_to_be32(dev);
  r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
@@ -930,6 +964,7 @@ static void blk_add_trace_bio_remap(void *ignore,
  __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
  bio->bi_rw, BLK_TA_REMAP, bio->bi_error,
  sizeof(r), &r);
+ rcu_read_unlock();
 }
 
 /**
@@ -950,11 +985,16 @@ static void blk_add_trace_rq_remap(void *ignore,
    struct request *rq, dev_t dev,
    sector_t from)
 {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
  struct blk_io_trace_remap r;
 
  if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
  return;
+ }
 
  r.device_from = cpu_to_be32(dev);
  r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
@@ -963,6 +1003,7 @@ static void blk_add_trace_rq_remap(void *ignore,
  __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
  rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
  sizeof(r), &r);
+ rcu_read_unlock();
 }
 
 /**
@@ -980,10 +1021,14 @@ void blk_add_driver_data(struct request_queue *q,
  struct request *rq,
  void *data, size_t len)
 {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
 
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
  return;
+ }
 
  if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
  __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
@@ -991,6 +1036,7 @@ void blk_add_driver_data(struct request_queue *q,
  else
  __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
  BLK_TA_DRV_DATA, rq->errors, len, data);
+ rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(blk_add_driver_data);
 
@@ -1482,6 +1528,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
  return -EINVAL;
 
  put_probe_ref();
+ synchronize_rcu();
  blk_trace_free(bt);
  return 0;
 }
@@ -1642,6 +1689,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
  struct hd_struct *p = dev_to_part(dev);
  struct request_queue *q;
  struct block_device *bdev;
+ struct blk_trace *bt;
  ssize_t ret = -ENXIO;
 
  bdev = bdget(part_devt(p));
@@ -1654,21 +1702,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
 
  mutex_lock(&bdev->bd_mutex);
 
+ bt = rcu_dereference_protected(q->blk_trace,
+       lockdep_is_held(&q->blk_trace_mutex));
  if (attr == &dev_attr_enable) {
- ret = sprintf(buf, "%u\n", !!q->blk_trace);
+ ret = sprintf(buf, "%u\n", !!bt);
  goto out_unlock_bdev;
  }
 
- if (q->blk_trace == NULL)
+ if (bt == NULL)
  ret = sprintf(buf, "disabled\n");
  else if (attr == &dev_attr_act_mask)
- ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
+ ret = blk_trace_mask2str(buf, bt->act_mask);
  else if (attr == &dev_attr_pid)
- ret = sprintf(buf, "%u\n", q->blk_trace->pid);
+ ret = sprintf(buf, "%u\n", bt->pid);
  else if (attr == &dev_attr_start_lba)
- ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
+ ret = sprintf(buf, "%llu\n", bt->start_lba);
  else if (attr == &dev_attr_end_lba)
- ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+ ret = sprintf(buf, "%llu\n", bt->end_lba);
 
 out_unlock_bdev:
  mutex_unlock(&bdev->bd_mutex);
@@ -1685,6 +1735,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
  struct block_device *bdev;
  struct request_queue *q;
  struct hd_struct *p;
+ struct blk_trace *bt;
  u64 value;
  ssize_t ret = -EINVAL;
 
@@ -1714,9 +1765,11 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
  goto out_bdput;
 
  mutex_lock(&bdev->bd_mutex);
+ bt = rcu_dereference_protected(q->blk_trace,
+       lockdep_is_held(&q->bd_mutex));
 
  if (attr == &dev_attr_enable) {
- if (!!value == !!q->blk_trace) {
+ if (!!value == !!bt) {
  ret = 0;
  goto out_unlock_bdev;
  }
@@ -1728,18 +1781,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
  }
 
  ret = 0;
- if (q->blk_trace == NULL)
+ if (bt == NULL)
  ret = blk_trace_setup_queue(q, bdev);
 
  if (ret == 0) {
  if (attr == &dev_attr_act_mask)
- q->blk_trace->act_mask = value;
+ bt->act_mask = value;
  else if (attr == &dev_attr_pid)
- q->blk_trace->pid = value;
+ bt->pid = value;
  else if (attr == &dev_attr_start_lba)
- q->blk_trace->start_lba = value;
+ bt->start_lba = value;
  else if (attr == &dev_attr_end_lba)
- q->blk_trace->end_lba = value;
+ bt->end_lba = value;
  }
 
 out_unlock_bdev:
--
2.20.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/2] blktrace: fix dereference after null check

benjamin.romer
In reply to this post by benjamin.romer
From: Cengiz Can <[hidden email]>

There was a recent change in blktrace.c that added a RCU protection to
`q->blk_trace` in order to fix a use-after-free issue during access.

However the change missed an edge case that can lead to dereferencing of
`bt` pointer even when it's NULL:

Coverity static analyzer marked this as a FORWARD_NULL issue with CID
1460458.

```
/kernel/trace/blktrace.c: 1904 in sysfs_blk_trace_attr_store()
1898            ret = 0;
1899            if (bt == NULL)
1900                    ret = blk_trace_setup_queue(q, bdev);
1901
1902            if (ret == 0) {
1903                    if (attr == &dev_attr_act_mask)
>>>     CID 1460458:  Null pointer dereferences  (FORWARD_NULL)
>>>     Dereferencing null pointer "bt".
1904                            bt->act_mask = value;
1905                    else if (attr == &dev_attr_pid)
1906                            bt->pid = value;
1907                    else if (attr == &dev_attr_start_lba)
1908                            bt->start_lba = value;
1909                    else if (attr == &dev_attr_end_lba)
```

Added a reassignment with RCU annotation to fix the issue.

Fixes: c780e86dd48 ("blktrace: Protect q->blk_trace with RCU")
Cc: [hidden email]
Reviewed-by: Ming Lei <[hidden email]>
Reviewed-by: Bob Liu <[hidden email]>
Reviewed-by: Steven Rostedt (VMware) <[hidden email]>
Signed-off-by: Cengiz Can <[hidden email]>
Signed-off-by: Jens Axboe <[hidden email]>

CVE-2019-19768
(cherry picked from commit 153031a301bb07194e9c37466cfce8eacb977621)
Signed-off-by: Benjamin M Romer <[hidden email]>
---
 kernel/trace/blktrace.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index a222697fd108..bd9ecb64479a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1781,8 +1781,11 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
  }
 
  ret = 0;
- if (bt == NULL)
+ if (bt == NULL) {
  ret = blk_trace_setup_queue(q, bdev);
+ bt = rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex));
+ }
 
  if (ret == 0) {
  if (attr == &dev_attr_act_mask)
--
2.20.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 0/2] [xenial] CVE-2019-19768

benjamin.romer
In reply to this post by benjamin.romer
In the Linux kernel 5.4.0-rc2, there is a use-after-free (read) in the
__blk_add_trace function in kernel/trace/blktrace.c (which is used to fill
out a blk_io_trace structure and place it in a per-cpu sub-buffer).

Cengiz Can (1):
  blktrace: fix dereference after null check

Jan Kara (1):
  blktrace: Protect q->blk_trace with RCU

 include/linux/blkdev.h       |   2 +-
 include/linux/blktrace_api.h |   6 +-
 kernel/trace/blktrace.c      | 118 ++++++++++++++++++++++++++---------
 3 files changed, 93 insertions(+), 33 deletions(-)

--
2.20.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

NAK: [PATCH 1/2] blktrace: Protect q->blk_trace with RCU

Andrea Righi
In reply to this post by benjamin.romer
On Tue, Mar 31, 2020 at 08:44:43AM -0400, Benjamin M Romer wrote:

> From: Jan Kara <[hidden email]>
>
> KASAN is reporting that __blk_add_trace() has a use-after-free issue
> when accessing q->blk_trace. Indeed the switching of block tracing (and
> thus eventual freeing of q->blk_trace) is completely unsynchronized with
> the currently running tracing and thus it can happen that the blk_trace
> structure is being freed just while __blk_add_trace() works on it.
> Protect accesses to q->blk_trace by RCU during tracing and make sure we
> wait for the end of RCU grace period when shutting down tracing. Luckily
> that is rare enough event that we can afford that. Note that postponing
> the freeing of blk_trace to an RCU callback should better be avoided as
> it could have unexpected user visible side-effects as debugfs files
> would be still existing for a short while block tracing has been shut
> down.
>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711
> CC: [hidden email]
> Reviewed-by: Chaitanya Kulkarni <[hidden email]>
> Reviewed-by: Ming Lei <[hidden email]>
> Tested-by: Ming Lei <[hidden email]>
> Reviewed-by: Bart Van Assche <[hidden email]>
> Reported-by: Tristan Madani <[hidden email]>
> Signed-off-by: Jan Kara <[hidden email]>
> Signed-off-by: Jens Axboe <[hidden email]>
>
> CVE-2019-19768
> (cherry picked from commit c780e86dd48ef6467a1146cf7d0fe1e05a635039)
> [ ben_r: adjusted patch to apply to ubuntu ]

Maybe we should use "backported from commit <sha1>"?

>  static void blk_add_trace_rq_abort(void *ignore,
> @@ -785,13 +794,19 @@ static void blk_add_trace_rq_complete(void *ignore,
>  static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
>        u32 what, int error)
>  {
> - struct blk_trace *bt = q->blk_trace;
> + struct blk_trace *bt;
>  
>   if (likely(!bt))

I think there's an error here. This "if" line should be removed,
otherwise we're just checking random data in the stack.

> + rcu_read_lock();
> + bt = rcu_dereference(q->blk_trace);
> + if (likely(!bt)) {
> + rcu_read_unlock();
>   return;
> + }
>  
>   __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
>   bio->bi_rw, what, error, 0, NULL);
> + rcu_read_unlock();
>  }

Thanks,
-Andrea

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

Re: NAK: [PATCH 1/2] blktrace: Protect q->blk_trace with RCU

benjamin.romer
On Tue, 2020-03-31 at 15:21 +0200, Andrea Righi wrote:
>
> I think there's an error here. This "if" line should be removed,
> otherwise we're just checking random data in the stack.
>

Thanks for catching this!! Upon double-checking the patch I found a
second merge error just like that one. I'll fix, retest and then
resubmit. :)

-- Ben



--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team