#include <stdlib.h>
#include <stdint.h>

#include <pthread.h>

#define LOOP 1000

struct node {
	struct node *next;
};

static pthread_barrier_t barrier;
static struct node the_terminal_node = { .next = &the_terminal_node };
static struct node pending_stack = { &the_terminal_node };

extern void __tsan_acquire(void*);
extern void __tsan_release(void*);

extern void __tsan_ignore_thread_begin();
extern void __tsan_ignore_thread_end();

static void *push_worker(void *nil)
{
	(void) nil;

	pthread_barrier_wait(&barrier);

	for (size_t k=0; k<LOOP; ++k){
		struct node *new_node, *old_node;

		new_node = calloc(1, sizeof(struct node));
		old_node = __atomic_exchange_n(&pending_stack.next,
					       new_node,
					       __ATOMIC_SEQ_CST);
		/* Works if RELEASE. */
		__atomic_store_n(&new_node->next, old_node, __ATOMIC_RELAXED);

		/* Also works if: */
#if 0
		__tsan_ignore_thread_begin();
		__atomic_store_n(&new_node->next, old_node, __ATOMIC_RELAXED);
		__tsan_ignore_thread_end();
#endif
		/* Why is this not working? */
#if 0
		__tsan_release(&new_node->next);
		__atomic_store_n(&new_node->next, old_node, __ATOMIC_RELAXED);
#endif

	}

	return NULL;
}

static void *pop_worker(void *nil)
{
	(void) nil;

	size_t k = 0;

	pthread_barrier_wait(&barrier);

	while (k < LOOP) {
		struct node *current_stack;
		struct node *next_node;

		current_stack = __atomic_exchange_n(&pending_stack.next,
						    &the_terminal_node,
						    __ATOMIC_SEQ_CST);

		while (current_stack != &the_terminal_node) {

		retry_load:
			next_node = __atomic_load_n(&current_stack->next,
						    __ATOMIC_CONSUME);

			if (!next_node) {
				goto retry_load;
			}

			free(current_stack);
			current_stack = next_node;
			++k;
		}
	}

	return NULL;
}

int main(void)
{
	pthread_t ths[2];

	pthread_barrier_init(&barrier, NULL, 3);

	pthread_create(&ths[0], NULL, push_worker, NULL);
	pthread_create(&ths[1], NULL, pop_worker, NULL);

	pthread_barrier_wait(&barrier);

	pthread_join(ths[0], NULL);
	pthread_join(ths[1], NULL);

	return 0;
}
