rt/emul/compact/src/main/java/java/util/concurrent/Exchanger.java
author Jaroslav Tulach <jaroslav.tulach@apidesign.org>
Sat, 19 Mar 2016 12:51:03 +0100
changeset 1895 bfaf3300b7ba
parent 1890 212417b74b72
permissions -rw-r--r--
Making java.util.concurrent package compilable except ForkJoinPool
jaroslav@1890
     1
/*
jaroslav@1890
     2
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
jaroslav@1890
     3
 *
jaroslav@1890
     4
 * This code is free software; you can redistribute it and/or modify it
jaroslav@1890
     5
 * under the terms of the GNU General Public License version 2 only, as
jaroslav@1890
     6
 * published by the Free Software Foundation.  Oracle designates this
jaroslav@1890
     7
 * particular file as subject to the "Classpath" exception as provided
jaroslav@1890
     8
 * by Oracle in the LICENSE file that accompanied this code.
jaroslav@1890
     9
 *
jaroslav@1890
    10
 * This code is distributed in the hope that it will be useful, but WITHOUT
jaroslav@1890
    11
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
jaroslav@1890
    12
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
jaroslav@1890
    13
 * version 2 for more details (a copy is included in the LICENSE file that
jaroslav@1890
    14
 * accompanied this code).
jaroslav@1890
    15
 *
jaroslav@1890
    16
 * You should have received a copy of the GNU General Public License version
jaroslav@1890
    17
 * 2 along with this work; if not, write to the Free Software Foundation,
jaroslav@1890
    18
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
jaroslav@1890
    19
 *
jaroslav@1890
    20
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
jaroslav@1890
    21
 * or visit www.oracle.com if you need additional information or have any
jaroslav@1890
    22
 * questions.
jaroslav@1890
    23
 */
jaroslav@1890
    24
jaroslav@1890
    25
/*
jaroslav@1890
    26
 * This file is available under and governed by the GNU General Public
jaroslav@1890
    27
 * License version 2 only, as published by the Free Software Foundation.
jaroslav@1890
    28
 * However, the following notice accompanied the original version of this
jaroslav@1890
    29
 * file:
jaroslav@1890
    30
 *
jaroslav@1890
    31
 * Written by Doug Lea, Bill Scherer, and Michael Scott with
jaroslav@1890
    32
 * assistance from members of JCP JSR-166 Expert Group and released to
jaroslav@1890
    33
 * the public domain, as explained at
jaroslav@1890
    34
 * http://creativecommons.org/publicdomain/zero/1.0/
jaroslav@1890
    35
 */
jaroslav@1890
    36
jaroslav@1890
    37
package java.util.concurrent;
jaroslav@1890
    38
import java.util.concurrent.atomic.*;
jaroslav@1890
    39
import java.util.concurrent.locks.LockSupport;
jaroslav@1890
    40
jaroslav@1890
    41
/**
jaroslav@1890
    42
 * A synchronization point at which threads can pair and swap elements
jaroslav@1890
    43
 * within pairs.  Each thread presents some object on entry to the
jaroslav@1890
    44
 * {@link #exchange exchange} method, matches with a partner thread,
jaroslav@1890
    45
 * and receives its partner's object on return.  An Exchanger may be
jaroslav@1890
    46
 * viewed as a bidirectional form of a {@link SynchronousQueue}.
jaroslav@1890
    47
 * Exchangers may be useful in applications such as genetic algorithms
jaroslav@1890
    48
 * and pipeline designs.
jaroslav@1890
    49
 *
jaroslav@1890
    50
 * <p><b>Sample Usage:</b>
jaroslav@1890
    51
 * Here are the highlights of a class that uses an {@code Exchanger}
jaroslav@1890
    52
 * to swap buffers between threads so that the thread filling the
jaroslav@1890
    53
 * buffer gets a freshly emptied one when it needs it, handing off the
jaroslav@1890
    54
 * filled one to the thread emptying the buffer.
jaroslav@1890
    55
 * <pre>{@code
jaroslav@1890
    56
 * class FillAndEmpty {
jaroslav@1890
    57
 *   Exchanger<DataBuffer> exchanger = new Exchanger<DataBuffer>();
jaroslav@1890
    58
 *   DataBuffer initialEmptyBuffer = ... a made-up type
jaroslav@1890
    59
 *   DataBuffer initialFullBuffer = ...
jaroslav@1890
    60
 *
jaroslav@1890
    61
 *   class FillingLoop implements Runnable {
jaroslav@1890
    62
 *     public void run() {
jaroslav@1890
    63
 *       DataBuffer currentBuffer = initialEmptyBuffer;
jaroslav@1890
    64
 *       try {
jaroslav@1890
    65
 *         while (currentBuffer != null) {
jaroslav@1890
    66
 *           addToBuffer(currentBuffer);
jaroslav@1890
    67
 *           if (currentBuffer.isFull())
jaroslav@1890
    68
 *             currentBuffer = exchanger.exchange(currentBuffer);
jaroslav@1890
    69
 *         }
jaroslav@1890
    70
 *       } catch (InterruptedException ex) { ... handle ... }
jaroslav@1890
    71
 *     }
jaroslav@1890
    72
 *   }
jaroslav@1890
    73
 *
jaroslav@1890
    74
 *   class EmptyingLoop implements Runnable {
jaroslav@1890
    75
 *     public void run() {
jaroslav@1890
    76
 *       DataBuffer currentBuffer = initialFullBuffer;
jaroslav@1890
    77
 *       try {
jaroslav@1890
    78
 *         while (currentBuffer != null) {
jaroslav@1890
    79
 *           takeFromBuffer(currentBuffer);
jaroslav@1890
    80
 *           if (currentBuffer.isEmpty())
jaroslav@1890
    81
 *             currentBuffer = exchanger.exchange(currentBuffer);
jaroslav@1890
    82
 *         }
jaroslav@1890
    83
 *       } catch (InterruptedException ex) { ... handle ...}
jaroslav@1890
    84
 *     }
jaroslav@1890
    85
 *   }
jaroslav@1890
    86
 *
jaroslav@1890
    87
 *   void start() {
jaroslav@1890
    88
 *     new Thread(new FillingLoop()).start();
jaroslav@1890
    89
 *     new Thread(new EmptyingLoop()).start();
jaroslav@1890
    90
 *   }
jaroslav@1890
    91
 * }
jaroslav@1890
    92
 * }</pre>
jaroslav@1890
    93
 *
jaroslav@1890
    94
 * <p>Memory consistency effects: For each pair of threads that
jaroslav@1890
    95
 * successfully exchange objects via an {@code Exchanger}, actions
jaroslav@1890
    96
 * prior to the {@code exchange()} in each thread
jaroslav@1890
    97
 * <a href="package-summary.html#MemoryVisibility"><i>happen-before</i></a>
jaroslav@1890
    98
 * those subsequent to a return from the corresponding {@code exchange()}
jaroslav@1890
    99
 * in the other thread.
jaroslav@1890
   100
 *
jaroslav@1890
   101
 * @since 1.5
jaroslav@1890
   102
 * @author Doug Lea and Bill Scherer and Michael Scott
jaroslav@1890
   103
 * @param <V> The type of objects that may be exchanged
jaroslav@1890
   104
 */
jaroslav@1890
   105
public class Exchanger<V> {
jaroslav@1890
   106
    /*
jaroslav@1890
   107
     * Algorithm Description:
jaroslav@1890
   108
     *
jaroslav@1890
   109
     * The basic idea is to maintain a "slot", which is a reference to
jaroslav@1890
   110
     * a Node containing both an Item to offer and a "hole" waiting to
jaroslav@1890
   111
     * get filled in.  If an incoming "occupying" thread sees that the
jaroslav@1890
   112
     * slot is null, it CAS'es (compareAndSets) a Node there and waits
jaroslav@1890
   113
     * for another to invoke exchange.  That second "fulfilling" thread
jaroslav@1890
   114
     * sees that the slot is non-null, and so CASes it back to null,
jaroslav@1890
   115
     * also exchanging items by CASing the hole, plus waking up the
jaroslav@1890
   116
     * occupying thread if it is blocked.  In each case CAS'es may
jaroslav@1890
   117
     * fail because a slot at first appears non-null but is null upon
jaroslav@1890
   118
     * CAS, or vice-versa.  So threads may need to retry these
jaroslav@1890
   119
     * actions.
jaroslav@1890
   120
     *
jaroslav@1890
   121
     * This simple approach works great when there are only a few
jaroslav@1890
   122
     * threads using an Exchanger, but performance rapidly
jaroslav@1890
   123
     * deteriorates due to CAS contention on the single slot when
jaroslav@1890
   124
     * there are lots of threads using an exchanger.  So instead we use
jaroslav@1890
   125
     * an "arena"; basically a kind of hash table with a dynamically
jaroslav@1890
   126
     * varying number of slots, any one of which can be used by
jaroslav@1890
   127
     * threads performing an exchange.  Incoming threads pick slots
jaroslav@1890
   128
     * based on a hash of their Thread ids.  If an incoming thread
jaroslav@1890
   129
     * fails to CAS in its chosen slot, it picks an alternative slot
jaroslav@1890
   130
     * instead.  And similarly from there.  If a thread successfully
jaroslav@1890
   131
     * CASes into a slot but no other thread arrives, it tries
jaroslav@1890
   132
     * another, heading toward the zero slot, which always exists even
jaroslav@1890
   133
     * if the table shrinks.  The particular mechanics controlling this
jaroslav@1890
   134
     * are as follows:
jaroslav@1890
   135
     *
jaroslav@1890
   136
     * Waiting: Slot zero is special in that it is the only slot that
jaroslav@1890
   137
     * exists when there is no contention.  A thread occupying slot
jaroslav@1890
   138
     * zero will block if no thread fulfills it after a short spin.
jaroslav@1890
   139
     * In other cases, occupying threads eventually give up and try
jaroslav@1890
   140
     * another slot.  Waiting threads spin for a while (a period that
jaroslav@1890
   141
     * should be a little less than a typical context-switch time)
jaroslav@1890
   142
     * before either blocking (if slot zero) or giving up (if other
jaroslav@1890
   143
     * slots) and restarting.  There is no reason for threads to block
jaroslav@1890
   144
     * unless there are unlikely to be any other threads present.
jaroslav@1890
   145
     * Occupants are mainly avoiding memory contention so sit there
jaroslav@1890
   146
     * quietly polling for a shorter period than it would take to
jaroslav@1890
   147
     * block and then unblock them.  Non-slot-zero waits that elapse
jaroslav@1890
   148
     * because of lack of other threads waste around one extra
jaroslav@1890
   149
     * context-switch time per try, which is still on average much
jaroslav@1890
   150
     * faster than alternative approaches.
jaroslav@1890
   151
     *
jaroslav@1890
   152
     * Sizing: Usually, using only a few slots suffices to reduce
jaroslav@1890
   153
     * contention.  Especially with small numbers of threads, using
jaroslav@1890
   154
     * too many slots can lead to just as poor performance as using
jaroslav@1890
   155
     * too few of them, and there's not much room for error.  The
jaroslav@1890
   156
     * variable "max" maintains the number of slots actually in
jaroslav@1890
   157
     * use.  It is increased when a thread sees too many CAS
jaroslav@1890
   158
     * failures.  (This is analogous to resizing a regular hash table
jaroslav@1890
   159
     * based on a target load factor, except here, growth steps are
jaroslav@1890
   160
     * just one-by-one rather than proportional.)  Growth requires
jaroslav@1890
   161
     * contention failures in each of three tried slots.  Requiring
jaroslav@1890
   162
     * multiple failures for expansion copes with the fact that some
jaroslav@1890
   163
     * failed CASes are not due to contention but instead to simple
jaroslav@1890
   164
     * races between two threads or thread pre-emptions occurring
jaroslav@1890
   165
     * between reading and CASing.  Also, very transient peak
jaroslav@1890
   166
     * contention can be much higher than the average sustainable
jaroslav@1890
   167
     * levels.  An attempt to decrease the max limit is usually made
jaroslav@1890
   168
     * when a non-slot-zero wait elapses without being fulfilled.
jaroslav@1890
   169
     * Threads experiencing elapsed waits move closer to zero, so
jaroslav@1890
   170
     * eventually find existing (or future) threads even if the table
jaroslav@1890
   171
     * has been shrunk due to inactivity.  The chosen mechanics and
jaroslav@1890
   172
     * thresholds for growing and shrinking are intrinsically
jaroslav@1890
   173
     * entangled with indexing and hashing inside the exchange code,
jaroslav@1890
   174
     * and can't be nicely abstracted out.
jaroslav@1890
   175
     *
jaroslav@1890
   176
     * Hashing: Each thread picks its initial slot to use in accord
jaroslav@1890
   177
     * with a simple hashcode.  The sequence is the same on each
jaroslav@1890
   178
     * encounter by any given thread, but effectively random across
jaroslav@1890
   179
     * threads.  Using arenas encounters the classic cost vs quality
jaroslav@1890
   180
     * tradeoffs of all hash tables.  Here, we use a one-step FNV-1a
jaroslav@1890
   181
     * hash code based on the current thread's Thread.getId(), along
jaroslav@1890
   182
     * with a cheap approximation to a mod operation to select an
jaroslav@1890
   183
     * index.  The downside of optimizing index selection in this way
jaroslav@1890
   184
     * is that the code is hardwired to use a maximum table size of
jaroslav@1890
   185
     * 32.  But this value more than suffices for known platforms and
jaroslav@1890
   186
     * applications.
jaroslav@1890
   187
     *
jaroslav@1890
   188
     * Probing: On sensed contention of a selected slot, we probe
jaroslav@1890
   189
     * sequentially through the table, analogously to linear probing
jaroslav@1890
   190
     * after collision in a hash table.  (We move circularly, in
jaroslav@1890
   191
     * reverse order, to mesh best with table growth and shrinkage
jaroslav@1890
   192
     * rules.)  Except that to minimize the effects of false-alarms
jaroslav@1890
   193
     * and cache thrashing, we try the first selected slot twice
jaroslav@1890
   194
     * before moving.
jaroslav@1890
   195
     *
jaroslav@1890
   196
     * Padding: Even with contention management, slots are heavily
jaroslav@1890
   197
     * contended, so use cache-padding to avoid poor memory
jaroslav@1890
   198
     * performance.  Because of this, slots are lazily constructed
jaroslav@1890
   199
     * only when used, to avoid wasting this space unnecessarily.
jaroslav@1890
   200
     * While isolation of locations is not much of an issue at first
jaroslav@1890
   201
     * in an application, as time goes on and garbage-collectors
jaroslav@1890
   202
     * perform compaction, slots are very likely to be moved adjacent
jaroslav@1890
   203
     * to each other, which can cause much thrashing of cache lines on
jaroslav@1890
   204
     * MPs unless padding is employed.
jaroslav@1890
   205
     *
jaroslav@1890
   206
     * This is an improvement of the algorithm described in the paper
jaroslav@1890
   207
     * "A Scalable Elimination-based Exchange Channel" by William
jaroslav@1890
   208
     * Scherer, Doug Lea, and Michael Scott in Proceedings of SCOOL05
jaroslav@1890
   209
     * workshop.  Available at: http://hdl.handle.net/1802/2104
jaroslav@1890
   210
     */
jaroslav@1890
   211
jaroslav@1890
   212
    /** The number of CPUs, for sizing and spin control */
jaroslav@1895
   213
    private static final int NCPU = 1;
jaroslav@1890
   214
jaroslav@1890
   215
    /**
jaroslav@1890
   216
     * The capacity of the arena.  Set to a value that provides more
jaroslav@1890
   217
     * than enough space to handle contention.  On small machines
jaroslav@1890
   218
     * most slots won't be used, but it is still not wasted because
jaroslav@1890
   219
     * the extra space provides some machine-level address padding
jaroslav@1890
   220
     * to minimize interference with heavily CAS'ed Slot locations.
jaroslav@1890
   221
     * And on very large machines, performance eventually becomes
jaroslav@1890
   222
     * bounded by memory bandwidth, not numbers of threads/CPUs.
jaroslav@1890
   223
     * This constant cannot be changed without also modifying
jaroslav@1890
   224
     * indexing and hashing algorithms.
jaroslav@1890
   225
     */
jaroslav@1890
   226
    private static final int CAPACITY = 32;
jaroslav@1890
   227
jaroslav@1890
   228
    /**
jaroslav@1890
   229
     * The value of "max" that will hold all threads without
jaroslav@1890
   230
     * contention.  When this value is less than CAPACITY, some
jaroslav@1890
   231
     * otherwise wasted expansion can be avoided.
jaroslav@1890
   232
     */
jaroslav@1890
   233
    private static final int FULL =
jaroslav@1890
   234
        Math.max(0, Math.min(CAPACITY, NCPU / 2) - 1);
jaroslav@1890
   235
jaroslav@1890
   236
    /**
jaroslav@1890
   237
     * The number of times to spin (doing nothing except polling a
jaroslav@1890
   238
     * memory location) before blocking or giving up while waiting to
jaroslav@1890
   239
     * be fulfilled.  Should be zero on uniprocessors.  On
jaroslav@1890
   240
     * multiprocessors, this value should be large enough so that two
jaroslav@1890
   241
     * threads exchanging items as fast as possible block only when
jaroslav@1890
   242
     * one of them is stalled (due to GC or preemption), but not much
jaroslav@1890
   243
     * longer, to avoid wasting CPU resources.  Seen differently, this
jaroslav@1890
   244
     * value is a little over half the number of cycles of an average
jaroslav@1890
   245
     * context switch time on most systems.  The value here is
jaroslav@1890
   246
     * approximately the average of those across a range of tested
jaroslav@1890
   247
     * systems.
jaroslav@1890
   248
     */
jaroslav@1890
   249
    private static final int SPINS = (NCPU == 1) ? 0 : 2000;
jaroslav@1890
   250
jaroslav@1890
   251
    /**
jaroslav@1890
   252
     * The number of times to spin before blocking in timed waits.
jaroslav@1890
   253
     * Timed waits spin more slowly because checking the time takes
jaroslav@1890
   254
     * time.  The best value relies mainly on the relative rate of
jaroslav@1890
   255
     * System.nanoTime vs memory accesses.  The value is empirically
jaroslav@1890
   256
     * derived to work well across a variety of systems.
jaroslav@1890
   257
     */
jaroslav@1890
   258
    private static final int TIMED_SPINS = SPINS / 20;
jaroslav@1890
   259
jaroslav@1890
   260
    /**
jaroslav@1890
   261
     * Sentinel item representing cancellation of a wait due to
jaroslav@1890
   262
     * interruption, timeout, or elapsed spin-waits.  This value is
jaroslav@1890
   263
     * placed in holes on cancellation, and used as a return value
jaroslav@1890
   264
     * from waiting methods to indicate failure to set or get hole.
jaroslav@1890
   265
     */
jaroslav@1890
   266
    private static final Object CANCEL = new Object();
jaroslav@1890
   267
jaroslav@1890
   268
    /**
jaroslav@1890
   269
     * Value representing null arguments/returns from public
jaroslav@1890
   270
     * methods.  This disambiguates from internal requirement that
jaroslav@1890
   271
     * holes start out as null to mean they are not yet set.
jaroslav@1890
   272
     */
jaroslav@1890
   273
    private static final Object NULL_ITEM = new Object();
jaroslav@1890
   274
jaroslav@1890
   275
    /**
jaroslav@1890
   276
     * Nodes hold partially exchanged data.  This class
jaroslav@1890
   277
     * opportunistically subclasses AtomicReference to represent the
jaroslav@1890
   278
     * hole.  So get() returns hole, and compareAndSet CAS'es value
jaroslav@1890
   279
     * into hole.  This class cannot be parameterized as "V" because
jaroslav@1890
   280
     * of the use of non-V CANCEL sentinels.
jaroslav@1890
   281
     */
jaroslav@1890
   282
    private static final class Node extends AtomicReference<Object> {
jaroslav@1890
   283
        /** The element offered by the Thread creating this node. */
jaroslav@1890
   284
        public final Object item;
jaroslav@1890
   285
jaroslav@1890
   286
        /** The Thread waiting to be signalled; null until waiting. */
jaroslav@1890
   287
        public volatile Thread waiter;
jaroslav@1890
   288
jaroslav@1890
   289
        /**
jaroslav@1890
   290
         * Creates node with given item and empty hole.
jaroslav@1890
   291
         * @param item the item
jaroslav@1890
   292
         */
jaroslav@1890
   293
        public Node(Object item) {
jaroslav@1890
   294
            this.item = item;
jaroslav@1890
   295
        }
jaroslav@1890
   296
    }
jaroslav@1890
   297
jaroslav@1890
   298
    /**
jaroslav@1890
   299
     * A Slot is an AtomicReference with heuristic padding to lessen
jaroslav@1890
   300
     * cache effects of this heavily CAS'ed location.  While the
jaroslav@1890
   301
     * padding adds noticeable space, all slots are created only on
jaroslav@1890
   302
     * demand, and there will be more than one of them only when it
jaroslav@1890
   303
     * would improve throughput more than enough to outweigh using
jaroslav@1890
   304
     * extra space.
jaroslav@1890
   305
     */
jaroslav@1890
   306
    private static final class Slot extends AtomicReference<Object> {
jaroslav@1890
   307
        // Improve likelihood of isolation on <= 64 byte cache lines
jaroslav@1890
   308
        long q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, qa, qb, qc, qd, qe;
jaroslav@1890
   309
    }
jaroslav@1890
   310
jaroslav@1890
   311
    /**
jaroslav@1890
   312
     * Slot array.  Elements are lazily initialized when needed.
jaroslav@1890
   313
     * Declared volatile to enable double-checked lazy construction.
jaroslav@1890
   314
     */
jaroslav@1890
   315
    private volatile Slot[] arena = new Slot[CAPACITY];
jaroslav@1890
   316
jaroslav@1890
   317
    /**
jaroslav@1890
   318
     * The maximum slot index being used.  The value sometimes
jaroslav@1890
   319
     * increases when a thread experiences too many CAS contentions,
jaroslav@1890
   320
     * and sometimes decreases when a spin-wait elapses.  Changes
jaroslav@1890
   321
     * are performed only via compareAndSet, to avoid stale values
jaroslav@1890
   322
     * when a thread happens to stall right before setting.
jaroslav@1890
   323
     */
jaroslav@1890
   324
    private final AtomicInteger max = new AtomicInteger();
jaroslav@1890
   325
jaroslav@1890
   326
    /**
jaroslav@1890
   327
     * Main exchange function, handling the different policy variants.
jaroslav@1890
   328
     * Uses Object, not "V" as argument and return value to simplify
jaroslav@1890
   329
     * handling of sentinel values.  Callers from public methods decode
jaroslav@1890
   330
     * and cast accordingly.
jaroslav@1890
   331
     *
jaroslav@1890
   332
     * @param item the (non-null) item to exchange
jaroslav@1890
   333
     * @param timed true if the wait is timed
jaroslav@1890
   334
     * @param nanos if timed, the maximum wait time
jaroslav@1890
   335
     * @return the other thread's item, or CANCEL if interrupted or timed out
jaroslav@1890
   336
     */
jaroslav@1890
   337
    private Object doExchange(Object item, boolean timed, long nanos) {
jaroslav@1890
   338
        Node me = new Node(item);                 // Create in case occupying
jaroslav@1890
   339
        int index = hashIndex();                  // Index of current slot
jaroslav@1890
   340
        int fails = 0;                            // Number of CAS failures
jaroslav@1890
   341
jaroslav@1890
   342
        for (;;) {
jaroslav@1890
   343
            Object y;                             // Contents of current slot
jaroslav@1890
   344
            Slot slot = arena[index];
jaroslav@1890
   345
            if (slot == null)                     // Lazily initialize slots
jaroslav@1890
   346
                createSlot(index);                // Continue loop to reread
jaroslav@1890
   347
            else if ((y = slot.get()) != null &&  // Try to fulfill
jaroslav@1890
   348
                     slot.compareAndSet(y, null)) {
jaroslav@1890
   349
                Node you = (Node)y;               // Transfer item
jaroslav@1890
   350
                if (you.compareAndSet(null, item)) {
jaroslav@1890
   351
                    LockSupport.unpark(you.waiter);
jaroslav@1890
   352
                    return you.item;
jaroslav@1890
   353
                }                                 // Else cancelled; continue
jaroslav@1890
   354
            }
jaroslav@1890
   355
            else if (y == null &&                 // Try to occupy
jaroslav@1890
   356
                     slot.compareAndSet(null, me)) {
jaroslav@1890
   357
                if (index == 0)                   // Blocking wait for slot 0
jaroslav@1890
   358
                    return timed ?
jaroslav@1890
   359
                        awaitNanos(me, slot, nanos) :
jaroslav@1890
   360
                        await(me, slot);
jaroslav@1890
   361
                Object v = spinWait(me, slot);    // Spin wait for non-0
jaroslav@1890
   362
                if (v != CANCEL)
jaroslav@1890
   363
                    return v;
jaroslav@1890
   364
                me = new Node(item);              // Throw away cancelled node
jaroslav@1890
   365
                int m = max.get();
jaroslav@1890
   366
                if (m > (index >>>= 1))           // Decrease index
jaroslav@1890
   367
                    max.compareAndSet(m, m - 1);  // Maybe shrink table
jaroslav@1890
   368
            }
jaroslav@1890
   369
            else if (++fails > 1) {               // Allow 2 fails on 1st slot
jaroslav@1890
   370
                int m = max.get();
jaroslav@1890
   371
                if (fails > 3 && m < FULL && max.compareAndSet(m, m + 1))
jaroslav@1890
   372
                    index = m + 1;                // Grow on 3rd failed slot
jaroslav@1890
   373
                else if (--index < 0)
jaroslav@1890
   374
                    index = m;                    // Circularly traverse
jaroslav@1890
   375
            }
jaroslav@1890
   376
        }
jaroslav@1890
   377
    }
jaroslav@1890
   378
jaroslav@1890
   379
    /**
jaroslav@1890
   380
     * Returns a hash index for the current thread.  Uses a one-step
jaroslav@1890
   381
     * FNV-1a hash code (http://www.isthe.com/chongo/tech/comp/fnv/)
jaroslav@1890
   382
     * based on the current thread's Thread.getId().  These hash codes
jaroslav@1890
   383
     * have more uniform distribution properties with respect to small
jaroslav@1890
   384
     * moduli (here 1-31) than do other simple hashing functions.
jaroslav@1890
   385
     *
jaroslav@1890
   386
     * <p>To return an index between 0 and max, we use a cheap
jaroslav@1890
   387
     * approximation to a mod operation, that also corrects for bias
jaroslav@1890
   388
     * due to non-power-of-2 remaindering (see {@link
jaroslav@1890
   389
     * java.util.Random#nextInt}).  Bits of the hashcode are masked
jaroslav@1890
   390
     * with "nbits", the ceiling power of two of table size (looked up
jaroslav@1890
   391
     * in a table packed into three ints).  If too large, this is
jaroslav@1890
   392
     * retried after rotating the hash by nbits bits, while forcing new
jaroslav@1890
   393
     * top bit to 0, which guarantees eventual termination (although
jaroslav@1890
   394
     * with a non-random-bias).  This requires an average of less than
jaroslav@1890
   395
     * 2 tries for all table sizes, and has a maximum 2% difference
jaroslav@1890
   396
     * from perfectly uniform slot probabilities when applied to all
jaroslav@1890
   397
     * possible hash codes for sizes less than 32.
jaroslav@1890
   398
     *
jaroslav@1890
   399
     * @return a per-thread-random index, 0 <= index < max
jaroslav@1890
   400
     */
jaroslav@1890
   401
    private final int hashIndex() {
jaroslav@1890
   402
        long id = Thread.currentThread().getId();
jaroslav@1890
   403
        int hash = (((int)(id ^ (id >>> 32))) ^ 0x811c9dc5) * 0x01000193;
jaroslav@1890
   404
jaroslav@1890
   405
        int m = max.get();
jaroslav@1890
   406
        int nbits = (((0xfffffc00  >> m) & 4) | // Compute ceil(log2(m+1))
jaroslav@1890
   407
                     ((0x000001f8 >>> m) & 2) | // The constants hold
jaroslav@1890
   408
                     ((0xffff00f2 >>> m) & 1)); // a lookup table
jaroslav@1890
   409
        int index;
jaroslav@1890
   410
        while ((index = hash & ((1 << nbits) - 1)) > m)       // May retry on
jaroslav@1890
   411
            hash = (hash >>> nbits) | (hash << (33 - nbits)); // non-power-2 m
jaroslav@1890
   412
        return index;
jaroslav@1890
   413
    }
jaroslav@1890
   414
jaroslav@1890
   415
    /**
jaroslav@1890
   416
     * Creates a new slot at given index.  Called only when the slot
jaroslav@1890
   417
     * appears to be null.  Relies on double-check using builtin
jaroslav@1890
   418
     * locks, since they rarely contend.  This in turn relies on the
jaroslav@1890
   419
     * arena array being declared volatile.
jaroslav@1890
   420
     *
jaroslav@1890
   421
     * @param index the index to add slot at
jaroslav@1890
   422
     */
jaroslav@1890
   423
    private void createSlot(int index) {
jaroslav@1890
   424
        // Create slot outside of lock to narrow sync region
jaroslav@1890
   425
        Slot newSlot = new Slot();
jaroslav@1890
   426
        Slot[] a = arena;
jaroslav@1890
   427
        synchronized (a) {
jaroslav@1890
   428
            if (a[index] == null)
jaroslav@1890
   429
                a[index] = newSlot;
jaroslav@1890
   430
        }
jaroslav@1890
   431
    }
jaroslav@1890
   432
jaroslav@1890
   433
    /**
jaroslav@1890
   434
     * Tries to cancel a wait for the given node waiting in the given
jaroslav@1890
   435
     * slot, if so, helping clear the node from its slot to avoid
jaroslav@1890
   436
     * garbage retention.
jaroslav@1890
   437
     *
jaroslav@1890
   438
     * @param node the waiting node
jaroslav@1890
   439
     * @param the slot it is waiting in
jaroslav@1890
   440
     * @return true if successfully cancelled
jaroslav@1890
   441
     */
jaroslav@1890
   442
    private static boolean tryCancel(Node node, Slot slot) {
jaroslav@1890
   443
        if (!node.compareAndSet(null, CANCEL))
jaroslav@1890
   444
            return false;
jaroslav@1890
   445
        if (slot.get() == node) // pre-check to minimize contention
jaroslav@1890
   446
            slot.compareAndSet(node, null);
jaroslav@1890
   447
        return true;
jaroslav@1890
   448
    }
jaroslav@1890
   449
jaroslav@1890
   450
    // Three forms of waiting. Each just different enough not to merge
jaroslav@1890
   451
    // code with others.
jaroslav@1890
   452
jaroslav@1890
   453
    /**
jaroslav@1890
   454
     * Spin-waits for hole for a non-0 slot.  Fails if spin elapses
jaroslav@1890
   455
     * before hole filled.  Does not check interrupt, relying on check
jaroslav@1890
   456
     * in public exchange method to abort if interrupted on entry.
jaroslav@1890
   457
     *
jaroslav@1890
   458
     * @param node the waiting node
jaroslav@1890
   459
     * @return on success, the hole; on failure, CANCEL
jaroslav@1890
   460
     */
jaroslav@1890
   461
    private static Object spinWait(Node node, Slot slot) {
jaroslav@1890
   462
        int spins = SPINS;
jaroslav@1890
   463
        for (;;) {
jaroslav@1890
   464
            Object v = node.get();
jaroslav@1890
   465
            if (v != null)
jaroslav@1890
   466
                return v;
jaroslav@1890
   467
            else if (spins > 0)
jaroslav@1890
   468
                --spins;
jaroslav@1890
   469
            else
jaroslav@1890
   470
                tryCancel(node, slot);
jaroslav@1890
   471
        }
jaroslav@1890
   472
    }
jaroslav@1890
   473
jaroslav@1890
   474
    /**
jaroslav@1890
   475
     * Waits for (by spinning and/or blocking) and gets the hole
jaroslav@1890
   476
     * filled in by another thread.  Fails if interrupted before
jaroslav@1890
   477
     * hole filled.
jaroslav@1890
   478
     *
jaroslav@1890
   479
     * When a node/thread is about to block, it sets its waiter field
jaroslav@1890
   480
     * and then rechecks state at least one more time before actually
jaroslav@1890
   481
     * parking, thus covering race vs fulfiller noticing that waiter
jaroslav@1890
   482
     * is non-null so should be woken.
jaroslav@1890
   483
     *
jaroslav@1890
   484
     * Thread interruption status is checked only surrounding calls to
jaroslav@1890
   485
     * park.  The caller is assumed to have checked interrupt status
jaroslav@1890
   486
     * on entry.
jaroslav@1890
   487
     *
jaroslav@1890
   488
     * @param node the waiting node
jaroslav@1890
   489
     * @return on success, the hole; on failure, CANCEL
jaroslav@1890
   490
     */
jaroslav@1890
   491
    private static Object await(Node node, Slot slot) {
jaroslav@1890
   492
        Thread w = Thread.currentThread();
jaroslav@1890
   493
        int spins = SPINS;
jaroslav@1890
   494
        for (;;) {
jaroslav@1890
   495
            Object v = node.get();
jaroslav@1890
   496
            if (v != null)
jaroslav@1890
   497
                return v;
jaroslav@1890
   498
            else if (spins > 0)                 // Spin-wait phase
jaroslav@1890
   499
                --spins;
jaroslav@1890
   500
            else if (node.waiter == null)       // Set up to block next
jaroslav@1890
   501
                node.waiter = w;
jaroslav@1890
   502
            else if (w.isInterrupted())         // Abort on interrupt
jaroslav@1890
   503
                tryCancel(node, slot);
jaroslav@1890
   504
            else                                // Block
jaroslav@1890
   505
                LockSupport.park(node);
jaroslav@1890
   506
        }
jaroslav@1890
   507
    }
jaroslav@1890
   508
jaroslav@1890
   509
    /**
jaroslav@1890
   510
     * Waits for (at index 0) and gets the hole filled in by another
jaroslav@1890
   511
     * thread.  Fails if timed out or interrupted before hole filled.
jaroslav@1890
   512
     * Same basic logic as untimed version, but a bit messier.
jaroslav@1890
   513
     *
jaroslav@1890
   514
     * @param node the waiting node
jaroslav@1890
   515
     * @param nanos the wait time
jaroslav@1890
   516
     * @return on success, the hole; on failure, CANCEL
jaroslav@1890
   517
     */
jaroslav@1890
   518
    private Object awaitNanos(Node node, Slot slot, long nanos) {
jaroslav@1890
   519
        int spins = TIMED_SPINS;
jaroslav@1890
   520
        long lastTime = 0;
jaroslav@1890
   521
        Thread w = null;
jaroslav@1890
   522
        for (;;) {
jaroslav@1890
   523
            Object v = node.get();
jaroslav@1890
   524
            if (v != null)
jaroslav@1890
   525
                return v;
jaroslav@1890
   526
            long now = System.nanoTime();
jaroslav@1890
   527
            if (w == null)
jaroslav@1890
   528
                w = Thread.currentThread();
jaroslav@1890
   529
            else
jaroslav@1890
   530
                nanos -= now - lastTime;
jaroslav@1890
   531
            lastTime = now;
jaroslav@1890
   532
            if (nanos > 0) {
jaroslav@1890
   533
                if (spins > 0)
jaroslav@1890
   534
                    --spins;
jaroslav@1890
   535
                else if (node.waiter == null)
jaroslav@1890
   536
                    node.waiter = w;
jaroslav@1890
   537
                else if (w.isInterrupted())
jaroslav@1890
   538
                    tryCancel(node, slot);
jaroslav@1890
   539
                else
jaroslav@1890
   540
                    LockSupport.parkNanos(node, nanos);
jaroslav@1890
   541
            }
jaroslav@1890
   542
            else if (tryCancel(node, slot) && !w.isInterrupted())
jaroslav@1890
   543
                return scanOnTimeout(node);
jaroslav@1890
   544
        }
jaroslav@1890
   545
    }
jaroslav@1890
   546
jaroslav@1890
   547
    /**
jaroslav@1890
   548
     * Sweeps through arena checking for any waiting threads.  Called
jaroslav@1890
   549
     * only upon return from timeout while waiting in slot 0.  When a
jaroslav@1890
   550
     * thread gives up on a timed wait, it is possible that a
jaroslav@1890
   551
     * previously-entered thread is still waiting in some other
jaroslav@1890
   552
     * slot.  So we scan to check for any.  This is almost always
jaroslav@1890
   553
     * overkill, but decreases the likelihood of timeouts when there
jaroslav@1890
   554
     * are other threads present to far less than that in lock-based
jaroslav@1890
   555
     * exchangers in which earlier-arriving threads may still be
jaroslav@1890
   556
     * waiting on entry locks.
jaroslav@1890
   557
     *
jaroslav@1890
   558
     * @param node the waiting node
jaroslav@1890
   559
     * @return another thread's item, or CANCEL
jaroslav@1890
   560
     */
jaroslav@1890
   561
    private Object scanOnTimeout(Node node) {
jaroslav@1890
   562
        Object y;
jaroslav@1890
   563
        for (int j = arena.length - 1; j >= 0; --j) {
jaroslav@1890
   564
            Slot slot = arena[j];
jaroslav@1890
   565
            if (slot != null) {
jaroslav@1890
   566
                while ((y = slot.get()) != null) {
jaroslav@1890
   567
                    if (slot.compareAndSet(y, null)) {
jaroslav@1890
   568
                        Node you = (Node)y;
jaroslav@1890
   569
                        if (you.compareAndSet(null, node.item)) {
jaroslav@1890
   570
                            LockSupport.unpark(you.waiter);
jaroslav@1890
   571
                            return you.item;
jaroslav@1890
   572
                        }
jaroslav@1890
   573
                    }
jaroslav@1890
   574
                }
jaroslav@1890
   575
            }
jaroslav@1890
   576
        }
jaroslav@1890
   577
        return CANCEL;
jaroslav@1890
   578
    }
jaroslav@1890
   579
jaroslav@1890
   580
    /**
jaroslav@1890
   581
     * Creates a new Exchanger.
jaroslav@1890
   582
     */
jaroslav@1890
   583
    public Exchanger() {
jaroslav@1890
   584
    }
jaroslav@1890
   585
jaroslav@1890
   586
    /**
jaroslav@1890
   587
     * Waits for another thread to arrive at this exchange point (unless
jaroslav@1890
   588
     * the current thread is {@linkplain Thread#interrupt interrupted}),
jaroslav@1890
   589
     * and then transfers the given object to it, receiving its object
jaroslav@1890
   590
     * in return.
jaroslav@1890
   591
     *
jaroslav@1890
   592
     * <p>If another thread is already waiting at the exchange point then
jaroslav@1890
   593
     * it is resumed for thread scheduling purposes and receives the object
jaroslav@1890
   594
     * passed in by the current thread.  The current thread returns immediately,
jaroslav@1890
   595
     * receiving the object passed to the exchange by that other thread.
jaroslav@1890
   596
     *
jaroslav@1890
   597
     * <p>If no other thread is already waiting at the exchange then the
jaroslav@1890
   598
     * current thread is disabled for thread scheduling purposes and lies
jaroslav@1890
   599
     * dormant until one of two things happens:
jaroslav@1890
   600
     * <ul>
jaroslav@1890
   601
     * <li>Some other thread enters the exchange; or
jaroslav@1890
   602
     * <li>Some other thread {@linkplain Thread#interrupt interrupts}
jaroslav@1890
   603
     * the current thread.
jaroslav@1890
   604
     * </ul>
jaroslav@1890
   605
     * <p>If the current thread:
jaroslav@1890
   606
     * <ul>
jaroslav@1890
   607
     * <li>has its interrupted status set on entry to this method; or
jaroslav@1890
   608
     * <li>is {@linkplain Thread#interrupt interrupted} while waiting
jaroslav@1890
   609
     * for the exchange,
jaroslav@1890
   610
     * </ul>
jaroslav@1890
   611
     * then {@link InterruptedException} is thrown and the current thread's
jaroslav@1890
   612
     * interrupted status is cleared.
jaroslav@1890
   613
     *
jaroslav@1890
   614
     * @param x the object to exchange
jaroslav@1890
   615
     * @return the object provided by the other thread
jaroslav@1890
   616
     * @throws InterruptedException if the current thread was
jaroslav@1890
   617
     *         interrupted while waiting
jaroslav@1890
   618
     */
jaroslav@1890
   619
    public V exchange(V x) throws InterruptedException {
jaroslav@1890
   620
        if (!Thread.interrupted()) {
jaroslav@1890
   621
            Object v = doExchange((x == null) ? NULL_ITEM : x, false, 0);
jaroslav@1890
   622
            if (v == NULL_ITEM)
jaroslav@1890
   623
                return null;
jaroslav@1890
   624
            if (v != CANCEL)
jaroslav@1890
   625
                return (V)v;
jaroslav@1890
   626
            Thread.interrupted(); // Clear interrupt status on IE throw
jaroslav@1890
   627
        }
jaroslav@1890
   628
        throw new InterruptedException();
jaroslav@1890
   629
    }
jaroslav@1890
   630
jaroslav@1890
   631
    /**
jaroslav@1890
   632
     * Waits for another thread to arrive at this exchange point (unless
jaroslav@1890
   633
     * the current thread is {@linkplain Thread#interrupt interrupted} or
jaroslav@1890
   634
     * the specified waiting time elapses), and then transfers the given
jaroslav@1890
   635
     * object to it, receiving its object in return.
jaroslav@1890
   636
     *
jaroslav@1890
   637
     * <p>If another thread is already waiting at the exchange point then
jaroslav@1890
   638
     * it is resumed for thread scheduling purposes and receives the object
jaroslav@1890
   639
     * passed in by the current thread.  The current thread returns immediately,
jaroslav@1890
   640
     * receiving the object passed to the exchange by that other thread.
jaroslav@1890
   641
     *
jaroslav@1890
   642
     * <p>If no other thread is already waiting at the exchange then the
jaroslav@1890
   643
     * current thread is disabled for thread scheduling purposes and lies
jaroslav@1890
   644
     * dormant until one of three things happens:
jaroslav@1890
   645
     * <ul>
jaroslav@1890
   646
     * <li>Some other thread enters the exchange; or
jaroslav@1890
   647
     * <li>Some other thread {@linkplain Thread#interrupt interrupts}
jaroslav@1890
   648
     * the current thread; or
jaroslav@1890
   649
     * <li>The specified waiting time elapses.
jaroslav@1890
   650
     * </ul>
jaroslav@1890
   651
     * <p>If the current thread:
jaroslav@1890
   652
     * <ul>
jaroslav@1890
   653
     * <li>has its interrupted status set on entry to this method; or
jaroslav@1890
   654
     * <li>is {@linkplain Thread#interrupt interrupted} while waiting
jaroslav@1890
   655
     * for the exchange,
jaroslav@1890
   656
     * </ul>
jaroslav@1890
   657
     * then {@link InterruptedException} is thrown and the current thread's
jaroslav@1890
   658
     * interrupted status is cleared.
jaroslav@1890
   659
     *
jaroslav@1890
   660
     * <p>If the specified waiting time elapses then {@link
jaroslav@1890
   661
     * TimeoutException} is thrown.  If the time is less than or equal
jaroslav@1890
   662
     * to zero, the method will not wait at all.
jaroslav@1890
   663
     *
jaroslav@1890
   664
     * @param x the object to exchange
jaroslav@1890
   665
     * @param timeout the maximum time to wait
jaroslav@1890
   666
     * @param unit the time unit of the <tt>timeout</tt> argument
jaroslav@1890
   667
     * @return the object provided by the other thread
jaroslav@1890
   668
     * @throws InterruptedException if the current thread was
jaroslav@1890
   669
     *         interrupted while waiting
jaroslav@1890
   670
     * @throws TimeoutException if the specified waiting time elapses
jaroslav@1890
   671
     *         before another thread enters the exchange
jaroslav@1890
   672
     */
jaroslav@1890
   673
    public V exchange(V x, long timeout, TimeUnit unit)
jaroslav@1890
   674
        throws InterruptedException, TimeoutException {
jaroslav@1890
   675
        if (!Thread.interrupted()) {
jaroslav@1890
   676
            Object v = doExchange((x == null) ? NULL_ITEM : x,
jaroslav@1890
   677
                                  true, unit.toNanos(timeout));
jaroslav@1890
   678
            if (v == NULL_ITEM)
jaroslav@1890
   679
                return null;
jaroslav@1890
   680
            if (v != CANCEL)
jaroslav@1890
   681
                return (V)v;
jaroslav@1890
   682
            if (!Thread.interrupted())
jaroslav@1890
   683
                throw new TimeoutException();
jaroslav@1890
   684
        }
jaroslav@1890
   685
        throw new InterruptedException();
jaroslav@1890
   686
    }
jaroslav@1890
   687
}