1> /* Statistics calculated using SQL: Mode and median */
2> /* */
3> use pubs2
1> /* */
2> /* Mode of price in titles table: */
3> /* */
4> /* 1. List prices in order so we can see what we have: */
5> /* */
6> select title_id, price
7> from titles
8> where price is not null
9> order by price
title_id price
-------- ------------------------
BU2075 2.99
MC3021 2.99
PS2106 7.00
PS7777 7.99
PS2091 10.95
BU1111 11.95
TC4203 11.95
TC7777 14.99
BU1032 19.99
BU7832 19.99
MC2222 19.99
PS3333 19.99
PC8888 20.00
TC3218 20.95
PS1372 21.59
PC1035 22.95
(16 rows affected)
1> /* */
2> /* 2. Find count (= frequency) of each price: */
3> /* */
4> select price, count(*) frequency
5> from titles
6> where price is not null
7> group by price
8> order by frequency
price frequency
------------------------ -----------
7.00 1
7.99 1
10.95 1
14.99 1
20.00 1
20.95 1
21.59 1
22.95 1
2.99 2
11.95 2
19.99 4
(11 rows affected)
1> /* */
2> /* 3. Find price with highest frequency, i.e., mode: */
3> /* */
4> select price, count(*) frequency
5> from titles
6> where price is not null
7> group by price
8> having count(*) >= all
9> (select count(*)
10> from titles
11> group by price)
price frequency
------------------------ -----------
19.99 4
(1 row affected)
1> /* */
2> /* Median price - Celko's first median, but separate views. */
3> /* Use separate views - in my database, not in pubs2 */
4> /* */
5> use evelyn
1> /* */
2> /* Smallest value in upper half: */
3> /* */
4> create view small
5> as
6> select min(price) MinTopHalf
7> from pubs2..titles
8> where price is not null
9> and price in
10> (select t1.price
11> from pubs2..titles t1, pubs2..titles t2
12> where t2.price >= t1.price
13> and t1.price is not null
14> group by t1.price
15> having count(t1.price) <=
16> (select ceiling(count(*) / 2.0)
17> from pubs2..titles))
1> /* */
2> select * from small
MinTopHalf
------------------------
14.99
(1 row affected)
1> /* */
2> /* Highest value in lower half: */
3> /* */
4> create view big
5> as
6> select max(price) MaxLowHalf
7> from pubs2..titles
8> where price is not null
9> and price in
10> (select t1.price
11> from pubs2..titles t1, pubs2..titles t2
12> where t2.price <= t1.price
13> and t1.price is not null
14> group by t1.price
15> having count(t1.price) <=
16> (select ceiling(count(*) / 2.0)
17> from pubs2..titles))
1> /* */
2> select * from big
MaxLowHalf
------------------------
14.99
(1 row affected)
1> /* */
2> /* Average the two values: */
3> /* */
4> select convert(numeric(6,2), ((MinTopHalf + MaxLowHalf) / 2.0))
5> from small, big
---------
14.99
(1 row affected)
1> /* */
2> /* Drop views so I can repeat this exercise. */
3> drop view small
1> drop view big
1> /* */
2> /* Median price - using Murchison's median, Celko, p.357 */
3> /* (Back to pubs2; can check result against sorted prices.) */
4> /* */
5> use pubs2
1> /* */
2> select avg(price)
3> from titles t1
4> where exists
5> (select count(*)
6> from titles t2
7> where t2.price >= t1.price
8> having count(*) =
9> (select floor(count(*) / 2.0)
10> from titles)
11> or count(*) =
12> (select ceiling(count(*) / 2.0)
13> from titles))
------------------------
14.99
(1 row affected)
1> /* */
2> dump tran evelyn with truncate_only