... | ... | @@ -447,49 +447,67 @@ def _backsolve(matrix_XTX, matrix_XTY): |
|
|
|
|
|
# Statistics
|
|
|
|
|
|
Suppose we need to compute some grade statistics for the following data.
|
|
|
|
|
|
```python
|
|
|
#---------------------------------------------------------------------------
|
|
|
# Data
|
|
|
#---------------------------------------------------------------------------
|
|
|
| Name | Homework 1 | Homework 2 | Exam 1 | Exam 2 |
|
|
|
| :------- | ----: | ----: | ----: | ----: |
|
|
|
| John | 100 | 98 | 100 | 90 |
|
|
|
| Tom | 100 | 0 | 70 | 90 |
|
|
|
| Bob | 100 | 70 | 90 | 80 |
|
|
|
|
|
|
The first step is to convert the table into...
|
|
|
|
|
|
- a list of exercises
|
|
|
|
|
|
```python
|
|
|
exercises = ["Homework 1", "Homework 2", "Exam 1", "Exam 2"]
|
|
|
```
|
|
|
|
|
|
- a list of names
|
|
|
|
|
|
```python
|
|
|
students = ["John", "Tom", "Bob"]
|
|
|
```
|
|
|
|
|
|
- a Numpy `ndarray`
|
|
|
|
|
|
```python
|
|
|
grades = np.array([[100., 98, 100., 90.],
|
|
|
[100., 0., 70., 90.],
|
|
|
[100., 70., 90., 80.]])
|
|
|
```
|
|
|
|
|
|
Numpy provides a number of statistics operations that can be applied by axis.
|
|
|
For a two-dimensional `ndarray` there are two choices:
|
|
|
|
|
|
- by row (student)
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
|
# Statistics by Student
|
|
|
#---------------------------------------------------------------------------
|
|
|
```python
|
|
|
avg_by_student = grades.mean(axis=1)
|
|
|
min_by_student = grades.min(axis=1)
|
|
|
max_by_student = grades.max(axis=1)
|
|
|
```
|
|
|
|
|
|
print("| {:^8} | {:^4} | {:^4} | {:^5} |".format("Name", "Avg", "Min", "Max"))
|
|
|
print("|:---------|-----:|-----:|------:|")
|
|
|
|
|
|
grade_data = zip(students, avg_by_student, min_by_student, max_by_student)
|
|
|
|
|
|
for name, avg, g_min, g_max in grade_data:
|
|
|
print(f"| {name:<8} | {avg:>4.1f} | {g_min:>4.1f} | {g_max:>5.1f} |")
|
|
|
| Name | Avg | Min | Max |
|
|
|
|:---------|-----:|-----:|------:|
|
|
|
| John | 97.0 | 90.0 | 100.0 |
|
|
|
| Tom | 65.0 | 0.0 | 100.0 |
|
|
|
| Bob | 85.0 | 70.0 | 100.0 |
|
|
|
|
|
|
print()
|
|
|
- by column (exercise)
|
|
|
|
|
|
#---------------------------------------------------------------------------
|
|
|
# Statistics by Exercise
|
|
|
#---------------------------------------------------------------------------
|
|
|
```python
|
|
|
avg_by_exercise = grades.mean(axis=0)
|
|
|
max_by_exercise = grades.max(axis=0)
|
|
|
std_by_exercise = grades.std(axis=0)
|
|
|
```
|
|
|
|
|
|
print("| {:^12} | {:^5} | {:^5} | {:^8} |".format("Exercise", "Avg", "Max", "Std Dev"))
|
|
|
print("|:-------------|------:|------:|---------:|")
|
|
|
|
|
|
grade_data = zip(exercises, avg_by_exercise, max_by_exercise, std_by_exercise)
|
|
|
| Exercise | Avg | Max | Std Dev |
|
|
|
|:-------------|------:|------:|---------:|
|
|
|
| Homework 1 | 100.0 | 100.0 | 0.0 |
|
|
|
| Homework 2 | 56.0 | 98.0 | 41.2 |
|
|
|
| Exam 1 | 86.7 | 100.0 | 12.5 |
|
|
|
| Exam 2 | 86.7 | 90.0 | 4.7 |
|
|
|
|
|
|
for exercise, avg, g_min, g_max in grade_data:
|
|
|
print(f"| {exercise:<12} | {avg:>5.1f} | {g_min:>5.1f} | {g_max:>8.1f} |")
|
|
|
```
|
|
|
|
|
|
|