Find each probability interval for each user, then use RAND()
to find you weighted pick.
DECLARE @Random FLOAT = RAND()
;WITH Odds AS
(
SELECT
V.*
FROM
(VALUES
('A', 0.1),
('B', 0.3),
('C', 0.4),
('D', 0.2)
) V (UserCode, Percentage)
),
OddIntervals AS
(
SELECT
O.*,
OddStart = SUM(O.Percentage) OVER (ORDER BY O.UserCode) - O.Percentage,
OddsEnd = SUM(O.Percentage) OVER (ORDER BY O.UserCode)
FROM
Odds AS O
)
SELECT
O.*
FROM
OddIntervals AS O
WHERE
@Random > O.OddStart AND
@Random <= O.OddsEnd
OddsIntervals
looks like the following:
UserCode Percentage OddStart OddsEnd
A 0.1 0.0 0.1
B 0.3 0.1 0.4
C 0.4 0.4 0.8
D 0.2 0.8 1.0
RAND() doesn't return 1, so the last interval might get a little disadvantage.
If you need to do this many times, you can use the following script to generate an N amount random numbers (took 4 seconds to generate 100k values for me). You have to provide different seed on each row to get different RAND()
result so I used NEWID()
as seed. You can simply replace the #Odds table with yours to execute.
DECLARE @AmountRandomValues INT = 100000
IF OBJECT_ID('tempdb..#RandomValues') IS NOT NULL
DROP TABLE #RandomValues
SELECT TOP (@AmountRandomValues)
RandomValue = RAND(CONVERT(VARBINARY, NEWID()))
INTO
#RandomValues
FROM
sys.columns AS s1
CROSS JOIN sys.columns AS s2
IF OBJECT_ID('tempdb..#Odds') IS NOT NULL
DROP TABLE #Odds
CREATE TABLE #Odds (
UserCode CHAR(1),
Percentage DECIMAL(3,2))
INSERT INTO #Odds (
UserCode,
Percentage)
VALUES
('A', 0.1),
('B', 0.3),
('C', 0.4),
('D', 0.2)
;WITH OddIntervals AS
(
SELECT
O.*,
OddStart = SUM(O.Percentage) OVER (ORDER BY O.UserCode) - O.Percentage,
OddsEnd = SUM(O.Percentage) OVER (ORDER BY O.UserCode)
FROM
#Odds AS O
)
SELECT
R.RandomValue,
O.*
FROM
#RandomValues AS R
INNER JOIN OddIntervals AS O ON
R.RandomValue > O.OddStart AND
R.RandomValue <= O.OddsEnd
For 100k values, the amount of user picks for a few runs as example were the following:
First run:
UserCode Amount
A 10222
B 29883
C 39738
D 20157
Second run:
UserCode Amount
A 10064
B 29794
C 40061
D 20081
Third run:
UserCode Amount
A 10030
B 29960
C 40261
D 19749
You can see this is pretty consistent to their odds.