from random import uniform
from random import randint

import numpy as np
import matplotlib.pyplot as plt

# Reference:
# Weighted random sampling with a reservoir; Pavlos S.Efraimidis; Paul G. Spirakis (2006)

#----------------------------------------------------------------------------------------#
# Define a weighting function (for example x**2)

def weight_function(x):
return x**2

#----------------------------------------------------------------------------------------#
# Input: a population V of n weighted items

n = 100000

v_items = []
v_weight = []
for i in range(n):
x = randint(1,100)
v_items.append(x)
v_weight.append(weight_function(x))

plt.hist(v_items, color="lightblue")

plt.grid()

plt.title('Weighted random sampling with a reservoir \n (initial population 1/2)')
plt.ylabel('Counts')
plt.xlim(0,100)

plt.savefig("population.png", bbox_inches='tight')
plt.show()

#----------------------------------------------------------------------------------------#
# Weighted Random Sampling

m = 1000 # m < n (sampling size)

reservoir = np.zeros((m,2))

# Step 1, 2

ui = uniform(0.0,1.0)

reservoir[0,0] = v_items[0]
reservoir[0,1] = ui**(1.0 / v_weight[0])

ki_min = reservoir[0,1]

for i in range(1,m):
ui = uniform(0.0,1.0)
reservoir[i,0] = v_items[i]
reservoir[i,1] = ui**(1.0/v_weight[i])
if reservoir[0,1] < ki_min: ki_min = reservoir[0,1]

# Step 3,4,5,6,7

for i in range(m,n):
ui = uniform(0.0,1.0)
ki = ui**(1.0/v_weight[i])
ki_min_idx = np.argmin(reservoir[:,1])
if ki > ki_min:
reservoir[ki_min_idx,0] = v_items[i]
reservoir[ki_min_idx,1] = ki
ki_min_idx = np.argmin(reservoir[:,1])
ki_min = reservoir[ki_min_idx,1]

#----------------------------------------------------------------------------------------#
# Plot Results

plt.hist(reservoir[:,0], color="red")

plt.grid()

plt.title('Weighted random sampling with a reservoir \n (Sampling 2/2)')
plt.ylabel('Counts')
plt.xlim(0,100)

plt.savefig("sampling.png", bbox_inches='tight')
plt.show()


#### Infos

Main author / Auteur principal:
Bazinga

Created / Créé: 2017-05-12

Edited / Modifié: 2017-05-12