Illumos 5630 - stale bonus buffer in recycled dnode_t leads to data corruption

5630 stale bonus buffer in recycled dnode_t leads to data corruption
Author: Justin T. Gibbs <justing@spectralogic.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Will Andrews <will@freebsd.org>
Approved by: Robert Mustacchi <rm@joyent.com>

References:
  https://www.illumos.org/issues/5630
  https://github.com/illumos/illumos-gate/commit/cd485b4

Ported-by: Chris Dunlop <chris@onthe.net.au>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <ryao@gentoo.org>
Issue #3172
This commit is contained in:
Justin T. Gibbs 2015-03-12 11:10:35 +11:00 committed by Brian Behlendorf
parent 73ad4a9f3c
commit 4c7b7eedcd
4 changed files with 65 additions and 12 deletions

View File

@ -290,6 +290,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
void *ref, dnode_t **dnp);
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
void dnode_rele_and_unlock(dnode_t *dn, void *tag);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
@ -311,6 +312,7 @@ void dnode_fini(void);
int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
int minlvl, uint64_t blkfill, uint64_t txg);
void dnode_evict_dbufs(dnode_t *dn);
void dnode_evict_bonus(dnode_t *dn);
#ifdef ZFS_DEBUG

View File

@ -2200,21 +2200,60 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
if (holds == 0) {
if (db->db_blkid == DMU_BONUS_BLKID) {
dnode_t *dn;
/*
* If the dnode moves here, we cannot cross this
* barrier until the move completes.
*/
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
atomic_dec_32(&dn->dn_dbufs_count);
/*
* Decrementing the dbuf count means that the bonus
* buffer's dnode hold is no longer discounted in
* dnode_move(). The dnode cannot move until after
* the dnode_rele_and_unlock() below.
*/
DB_DNODE_EXIT(db);
/*
* Do not reference db after its lock is dropped.
* Another thread may evict it.
*/
mutex_exit(&db->db_mtx);
/*
* If the dnode moves here, we cannot cross this barrier
* until the move completes.
* If the dnode has been freed, evict the bonus
* buffer immediately. The data in the bonus
* buffer is no longer relevant and this prevents
* a stale bonus buffer from being associated
* with this dnode_t should the dnode_t be reused
* prior to being destroyed.
*/
DB_DNODE_ENTER(db);
atomic_dec_32(&DB_DNODE(db)->dn_dbufs_count);
DB_DNODE_EXIT(db);
/*
* The bonus buffer's dnode hold is no longer discounted
* in dnode_move(). The dnode cannot move until after
* the dnode_rele().
*/
dnode_rele(DB_DNODE(db), db);
mutex_enter(&dn->dn_mtx);
if (dn->dn_type == DMU_OT_NONE ||
dn->dn_free_txg != 0) {
/*
* Drop dn_mtx. It is a leaf lock and
* cannot be held when dnode_evict_bonus()
* acquires other locks in order to
* perform the eviction.
*
* Freed dnodes cannot be reused until the
* last hold is released. Since this bonus
* buffer has a hold, the dnode will remain
* in the free state, even without dn_mtx
* held, until the dnode_rele_and_unlock()
* below.
*/
mutex_exit(&dn->dn_mtx);
dnode_evict_bonus(dn);
mutex_enter(&dn->dn_mtx);
}
dnode_rele_and_unlock(dn, db);
} else if (db->db_buf == NULL) {
/*
* This is a special case: we never associated this

View File

@ -1157,13 +1157,19 @@ dnode_add_ref(dnode_t *dn, void *tag)
void
dnode_rele(dnode_t *dn, void *tag)
{
mutex_enter(&dn->dn_mtx);
dnode_rele_and_unlock(dn, tag);
}
void
dnode_rele_and_unlock(dnode_t *dn, void *tag)
{
uint64_t refs;
/* Get while the hold prevents the dnode from moving. */
dmu_buf_impl_t *db = dn->dn_dbuf;
dnode_handle_t *dnh = dn->dn_handle;
mutex_enter(&dn->dn_mtx);
refs = refcount_remove(&dn->dn_holds, tag);
mutex_exit(&dn->dn_mtx);

View File

@ -454,6 +454,12 @@ dnode_evict_dbufs(dnode_t *dn)
if (pass >= 100)
dprintf("Required %d passes to evict dbufs\n", pass);
dnode_evict_bonus(dn);
}
void
dnode_evict_bonus(dnode_t *dn)
{
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
mutex_enter(&dn->dn_bonus->db_mtx);