@@ -385,9 +385,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
385385 */
386386static struct futex_hash_bucket * hash_futex (union futex_key * key )
387387{
388- u32 hash = jhash2 ((u32 * )& key -> both .word ,
389- (sizeof (key -> both .word )+ sizeof (key -> both .ptr ))/4 ,
388+ u32 hash = jhash2 ((u32 * )key , offsetof(typeof (* key ), both .offset ) / 4 ,
390389 key -> both .offset );
390+
391391 return & futex_queues [hash & (futex_hashsize - 1 )];
392392}
393393
@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
429429
430430 switch (key -> both .offset & (FUT_OFF_INODE |FUT_OFF_MMSHARED )) {
431431 case FUT_OFF_INODE :
432- ihold ( key -> shared . inode ); /* implies smp_mb(); (B) */
432+ smp_mb (); /* explicit smp_mb(); (B) */
433433 break ;
434434 case FUT_OFF_MMSHARED :
435435 futex_get_mm (key ); /* implies smp_mb(); (B) */
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
463463
464464 switch (key -> both .offset & (FUT_OFF_INODE |FUT_OFF_MMSHARED )) {
465465 case FUT_OFF_INODE :
466- iput (key -> shared .inode );
467466 break ;
468467 case FUT_OFF_MMSHARED :
469468 mmdrop (key -> private .mm );
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
505504 return timeout ;
506505}
507506
507+ /*
508+ * Generate a machine wide unique identifier for this inode.
509+ *
510+ * This relies on u64 not wrapping in the life-time of the machine; which with
511+ * 1ns resolution means almost 585 years.
512+ *
513+ * This further relies on the fact that a well formed program will not unmap
514+ * the file while it has a (shared) futex waiting on it. This mapping will have
515+ * a file reference which pins the mount and inode.
516+ *
517+ * If for some reason an inode gets evicted and read back in again, it will get
518+ * a new sequence number and will _NOT_ match, even though it is the exact same
519+ * file.
520+ *
521+ * It is important that match_futex() will never have a false-positive, esp.
522+ * for PI futexes that can mess up the state. The above argues that false-negatives
523+ * are only possible for malformed programs.
524+ */
525+ static u64 get_inode_sequence_number (struct inode * inode )
526+ {
527+ static atomic64_t i_seq ;
528+ u64 old ;
529+
530+ /* Does the inode already have a sequence number? */
531+ old = atomic64_read (& inode -> i_sequence );
532+ if (likely (old ))
533+ return old ;
534+
535+ for (;;) {
536+ u64 new = atomic64_add_return (1 , & i_seq );
537+ if (WARN_ON_ONCE (!new ))
538+ continue ;
539+
540+ old = atomic64_cmpxchg_relaxed (& inode -> i_sequence , 0 , new );
541+ if (old )
542+ return old ;
543+ return new ;
544+ }
545+ }
546+
508547/**
509548 * get_futex_key() - Get parameters which are the keys for a futex
510549 * @uaddr: virtual address of the futex
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
517556 *
518557 * The key words are stored in @key on success.
519558 *
520- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
521- * offset_within_page). For private mappings, it's (uaddr, current->mm).
522- * We can usually work out the index without swapping in the page.
559+ * For shared mappings (when @fshared), the key is:
560+ * ( inode->i_sequence, page->index, offset_within_page )
561+ * [ also see get_inode_sequence_number() ]
562+ *
563+ * For private mappings (or when !@fshared), the key is:
564+ * ( current->mm, address, 0 )
565+ *
566+ * This allows (cross process, where applicable) identification of the futex
567+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
523568 *
524569 * lock_page() might sleep, the caller should not hold a spinlock.
525570 */
@@ -659,8 +704,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
659704 key -> private .mm = mm ;
660705 key -> private .address = address ;
661706
662- get_futex_key_refs (key ); /* implies smp_mb(); (B) */
663-
664707 } else {
665708 struct inode * inode ;
666709
@@ -692,40 +735,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
692735 goto again ;
693736 }
694737
695- /*
696- * Take a reference unless it is about to be freed. Previously
697- * this reference was taken by ihold under the page lock
698- * pinning the inode in place so i_lock was unnecessary. The
699- * only way for this check to fail is if the inode was
700- * truncated in parallel which is almost certainly an
701- * application bug. In such a case, just retry.
702- *
703- * We are not calling into get_futex_key_refs() in file-backed
704- * cases, therefore a successful atomic_inc return below will
705- * guarantee that get_futex_key() will still imply smp_mb(); (B).
706- */
707- if (!atomic_inc_not_zero (& inode -> i_count )) {
708- rcu_read_unlock ();
709- put_page (page );
710-
711- goto again ;
712- }
713-
714- /* Should be impossible but lets be paranoid for now */
715- if (WARN_ON_ONCE (inode -> i_mapping != mapping )) {
716- err = - EFAULT ;
717- rcu_read_unlock ();
718- iput (inode );
719-
720- goto out ;
721- }
722-
723738 key -> both .offset |= FUT_OFF_INODE ; /* inode-based key */
724- key -> shared .inode = inode ;
739+ key -> shared .i_seq = get_inode_sequence_number ( inode ) ;
725740 key -> shared .pgoff = basepage_index (tail );
726741 rcu_read_unlock ();
727742 }
728743
744+ get_futex_key_refs (key ); /* implies smp_mb(); (B) */
745+
729746out :
730747 put_page (page );
731748 return err ;
0 commit comments