from random import uniform
from random import randint

import numpy as np
import matplotlib.pyplot as plt

# Reference: 
# Weighted random sampling with a reservoir; Pavlos S.Efraimidis; Paul G. Spirakis (2006)

#----------------------------------------------------------------------------------------#
# Define a weighting function (for example x**2)

def weight_function(x):
	return x**2

#----------------------------------------------------------------------------------------#
# Input: a population V of n weighted items

n = 100000

v_items = []
v_weight = []
for i in range(n):
	x = randint(1,100)
	v_items.append(x)
	v_weight.append(weight_function(x))

plt.hist(v_items, color="lightblue")

plt.grid()

plt.title('Weighted random sampling with a reservoir \n (initial population 1/2)')
plt.ylabel('Counts')
plt.xlim(0,100)

plt.savefig("population.png", bbox_inches='tight')
plt.show()

#----------------------------------------------------------------------------------------#
# Weighted Random Sampling 

m = 1000 # m < n (sampling size)

reservoir = np.zeros((m,2))

# Step 1, 2

ui = uniform(0.0,1.0)

reservoir[0,0] = v_items[0] 
reservoir[0,1] = ui**(1.0 / v_weight[0])

ki_min = reservoir[0,1]

for i in range(1,m):
	ui = uniform(0.0,1.0)
	reservoir[i,0] = v_items[i]
	reservoir[i,1] = ui**(1.0/v_weight[i])
	if reservoir[0,1] < ki_min: ki_min = reservoir[0,1]

# Step 3,4,5,6,7

for i in range(m,n):
	ui = uniform(0.0,1.0)
	ki = ui**(1.0/v_weight[i])
	ki_min_idx = np.argmin(reservoir[:,1])
	if ki > ki_min:
		reservoir[ki_min_idx,0] = v_items[i]
		reservoir[ki_min_idx,1] = ki
		ki_min_idx = np.argmin(reservoir[:,1])
		ki_min = reservoir[ki_min_idx,1]
		
#----------------------------------------------------------------------------------------#
# Plot Results

plt.hist(reservoir[:,0], color="red")

plt.grid()

plt.title('Weighted random sampling with a reservoir \n (Sampling 2/2)')
plt.ylabel('Counts')
plt.xlim(0,100)

plt.savefig("sampling.png", bbox_inches='tight')
plt.show()

About / Au sujet de

Created:
12 mai 2017 20:25:05

Updated:
12 mai 2017 20:25:05

License / Licence

MIT License

Abstract / Résumé