```r
# simple regression
> pvals
covariate cond_diff_pval
1: quality_mean 0.95309025
2: quality_count 0.91390702
3: quality_sum 0.92131203
4: active_days 0.41823554
5: n_retweet 0.93206499
6: n_tweet 0.40275545
7: followers_count 0.98859882
8: friends_count 0.09262928
9: favourites_count 0.64649878
10: statuses_count 0.82920599
11: friend_follow_ratio 0.33278385
12: days_since_create 0.63221776
13: quality_count_yhat 0.57180324
14: quality_mean_yhat 0.80773586
15: quality_sum_yhat 0.77714149
# account for blocking with diference in means
> pvals_adjust
covariate adjusted_pval
1: quality_mean 0.76547210
2: quality_count 0.85526273
3: quality_sum 0.82473805
4: active_days 0.02572512
5: n_retweet 0.58358232
6: n_tweet 0.22982856
7: followers_count 0.82938058
8: friends_count 0.07063480
9: favourites_count 0.49645500
10: statuses_count 0.65824120
11: friend_follow_ratio 0.13966778
12: days_since_create 0.37234025
13: quality_count_yhat 0.45122588
14: quality_mean_yhat 0.68883702
15: quality_sum_yhat 0.84189259
```
```r
# rank features and sort them, then divide into two groups based on ranking
> dt7 <- dt7[order(quality_count_yhat, quality_count, active_days), ]
> dt7[, group_rank := 1:.N]
> round(cor(dt7), 2)
block quality_count quality_count_yhat active_days group_rank
block 1.00 0.00 0.01 0.02 -0.02
quality_count 0.00 1.00 0.65 0.81 0.59
quality_count_yhat 0.01 0.65 1.00 0.54 0.87
active_days 0.02 0.81 0.54 1.00 0.54
group_rank -0.02 0.59 0.87 0.54 1.00
```
![[210930-182639_matched_32977_yhat_condition_group.png]]