voice vignette

version 0.4.16

Filipe J. Zabala

2022-09-15

0. Installation

0.1 Minimal

# CRAN (stable)
install.packages('voice', dep = TRUE)

# Github (development)
devtools::install_github('filipezabala/voice')

0.2 Full

https://github.com/filipezabala/voice

1. Extract features

1.1 Get path to audio files

wavFiles <- list.files(system.file('extdata', package = 'wrassp'),
                       pattern <- glob2rx('*.wav'), full.names = TRUE)

1.2 Minimal usage

M <- voice::extract_features(wavFiles)
M
#> # A tibble: 2,389 × 61
#>    slice…¹ slice…² wav_p…³    f0    f1    f2    f3    f4    f5    f6    f7    f8
#>      <int>   <int> <chr>   <dbl> <int> <int> <int> <int> <int> <int> <int> <int>
#>  1       1       1 /usr/l…    NA    NA  1863    NA  3087  4218  5233  6144  6643
#>  2       2       2 /usr/l…    NA    NA  1863    NA  3179  4172  5259  6712    NA
#>  3       3       3 /usr/l…    NA    NA  1933  3055  3731  4663  5658  6775    NA
#>  4       4       4 /usr/l…    NA    NA  1777  2791  3712  4690  5657  6602  7771
#>  5       5       5 /usr/l…    NA    NA  1710  2690  3536  4677  5653  6526    NA
#>  6       6       6 /usr/l…    NA    NA  1794  2673  3560  4402  5119  6499    NA
#>  7       7       7 /usr/l…    NA    NA  1873  2665  3640  4346  5164  6516    NA
#>  8       8       8 /usr/l…    NA    NA  1932  2684  3550  4304  5190  6518    NA
#>  9       9       9 /usr/l…    NA   148  1960  2648  3248  3640  5214  6498  7668
#> 10      10      10 /usr/l…    NA   202  1965  2650  3284  3664  5210  6497  7716
#> # … with 2,379 more rows, 49 more variables: mfcc1 <dbl>, mfcc2 <dbl>,
#> #   mfcc3 <dbl>, mfcc4 <dbl>, mfcc5 <dbl>, mfcc6 <dbl>, mfcc7 <dbl>,
#> #   mfcc8 <dbl>, mfcc9 <dbl>, mfcc10 <dbl>, mfcc11 <dbl>, mfcc12 <dbl>,
#> #   df2 <dbl>, df3 <dbl>, df4 <dbl>, df5 <dbl>, df6 <dbl>, df7 <dbl>,
#> #   df8 <dbl>, pf1 <dbl>, pf2 <dbl>, pf3 <dbl>, pf4 <dbl>, pf5 <dbl>,
#> #   pf6 <dbl>, pf7 <dbl>, pf8 <dbl>, rf1 <dbl>, rf2 <dbl>, rf3 <dbl>,
#> #   rf4 <dbl>, rf5 <dbl>, rf6 <dbl>, rf7 <dbl>, rf8 <dbl>, rcf2 <dbl>, …

2. Tag

2.1 Creating synthetic data

E <- dplyr::tibble(subject_id = c(1,1,1,2,2,2,3,3,3), wav_path = wavFiles)
E
#> # A tibble: 9 × 2
#>   subject_id wav_path                                               
#>        <dbl> <chr>                                                  
#> 1          1 /usr/local/lib/R/site-library/wrassp/extdata/lbo001.wav
#> 2          1 /usr/local/lib/R/site-library/wrassp/extdata/lbo002.wav
#> 3          1 /usr/local/lib/R/site-library/wrassp/extdata/lbo003.wav
#> 4          2 /usr/local/lib/R/site-library/wrassp/extdata/lbo004.wav
#> 5          2 /usr/local/lib/R/site-library/wrassp/extdata/lbo005.wav
#> 6          2 /usr/local/lib/R/site-library/wrassp/extdata/lbo006.wav
#> 7          3 /usr/local/lib/R/site-library/wrassp/extdata/lbo007.wav
#> 8          3 /usr/local/lib/R/site-library/wrassp/extdata/lbo008.wav
#> 9          3 /usr/local/lib/R/site-library/wrassp/extdata/lbo009.wav

2.2 Extended data

voice::tag(E)
#> # A tibble: 9 × 7
#>   wav_path                       f0_ta…¹ f0_ta…² f0_ta…³ f0_ta…⁴ f0_ta…⁵ f0_ta…⁶
#>   <chr>                            <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 /usr/local/lib/R/site-library…    85.4    17.6   0.206    76.1    29.4    7.53
#> 2 /usr/local/lib/R/site-library…    85.4    15.6   0.183    80.1    27.8   14.4 
#> 3 /usr/local/lib/R/site-library…    84.6    13.0   0.154    78.8    23.9   14.0 
#> 4 /usr/local/lib/R/site-library…    84.8    14.5   0.171    79.1    28.1   11.9 
#> 5 /usr/local/lib/R/site-library…    86.0    14.7   0.170    78.7    30.0   11.0 
#> 6 /usr/local/lib/R/site-library…    82.9    15.6   0.188    74.8    23.8    4.78
#> 7 /usr/local/lib/R/site-library…    78.2    16.2   0.207    73.5    13.4    6.82
#> 8 /usr/local/lib/R/site-library…    84.5    14.5   0.172    78.1    17.8    8.95
#> 9 /usr/local/lib/R/site-library…    81.0    12.2   0.151    75.9    23.1    9.14
#> # … with abbreviated variable names ¹​f0_tag_mean, ²​f0_tag_sd, ³​f0_tag_vc,
#> #   ⁴​f0_tag_median, ⁵​f0_tag_iqr, ⁶​f0_tag_mad

2.3 Canonical data

voice::tag(E, groupBy = 'subject_id')
#> # A tibble: 3 × 7
#>   subject_id f0_tag_mean f0_tag_sd f0_tag_vc f0_tag_median f0_tag_iqr f0_tag_mad
#>        <dbl>       <dbl>     <dbl>     <dbl>         <dbl>      <dbl>      <dbl>
#> 1          1        85.1      15.3     0.180          78.3       26.8      11.9 
#> 2          2        84.6      14.9     0.176          76.4       28.3       7.97
#> 3          3        81.0      14.6     0.180          75.6       21.6       8.68

3. Voice2Sheet (experimental)

3.1 Get audio

url0 <- 'https://github.com/filipezabala/voiceAudios/blob/main/mp3/doremi.mp3?raw=true'
download.file(url0, paste0(tempdir(), '/doremi.mp3'), mode = 'wb')
embedr::embed_audio(url0)

3.2 Convert mp3 to wav mono

cmd <- paste0('cd ', tempdir(), ';', 
' for i in *.[Mm][Pp]3; do ffmpeg -i "$i" -ac 1 "./${i%.*}.wav"; done')
system(cmd)

3.3 Extract F0

M <- voice::extract_features(tempdir(), features = 'f0')
summary(M)
#>    slice_seq      slice_seq_file     wav_path               f0       
#>  Min.   :   1.0   Min.   :   1.0   Length:1179        Min.   :120.2  
#>  1st Qu.: 295.5   1st Qu.: 295.5   Class :character   1st Qu.:149.4  
#>  Median : 590.0   Median : 590.0   Mode  :character   Median :206.0  
#>  Mean   : 590.0   Mean   : 590.0                      Mean   :208.9  
#>  3rd Qu.: 884.5   3rd Qu.: 884.5                      3rd Qu.:259.5  
#>  Max.   :1179.0   Max.   :1179.0                      Max.   :352.3  
#>                                                       NA's   :223

3.4 Plot

# Plot
plot(M$f0)
legend(-60, 170, 'Do (C3)' , bty = 'n')
legend(80, 190 , 'Re (D3)' , bty = 'n')
legend(220, 290, 'Mi (E4)' , bty = 'n')
legend(350, 210, 'Fa (F3)' , bty = 'n')
legend(480, 250, 'Sol (G3)', bty = 'n')
legend(600, 270, 'La (A3)' , bty = 'n')
legend(720, 310, 'Si (B3)' , bty = 'n')
legend(910, 320, 'Do (C4)' , bty = 'n')

3.5 Assign notes

(C3 <- median(M$f0[1:150], na.rm = TRUE))
#> [1] 129.7059
voice::notes(C3)
#> [1] C3
#> 108 Levels: C0 < C#0 < D0 < D#0 < E0 < F0 < F#0 < G0 < G#0 < A0 < A#0 < ... < B8
(D3 <- median(M$f0[190:280], na.rm = TRUE))
#> [1] 143.2753
voice::notes(D3)
#> [1] D3
#> 108 Levels: C0 < C#0 < D0 < D#0 < E0 < F0 < F#0 < G0 < G#0 < A0 < A#0 < ... < B8
(E4 <- median(M$f0[310:380], na.rm = TRUE))
#> [1] 320.6903
voice::notes(E4)
#> [1] E4
#> 108 Levels: C0 < C#0 < D0 < D#0 < E0 < F0 < F#0 < G0 < G#0 < A0 < A#0 < ... < B8
(F3 <- median(M$f0[420:500], na.rm = TRUE))
#> [1] 170.1356
voice::notes(F3)
#> [1] F3
#> 108 Levels: C0 < C#0 < D0 < D#0 < E0 < F0 < F#0 < G0 < G#0 < A0 < A#0 < ... < B8
(G3 <- median(M$f0[590:630], na.rm = TRUE))
#> [1] 195.0839
voice::notes(G3)
#> [1] G3
#> 108 Levels: C0 < C#0 < D0 < D#0 < E0 < F0 < F#0 < G0 < G#0 < A0 < A#0 < ... < B8
(A3 <- median(M$f0[700:770], na.rm = TRUE))
#> [1] 220.4526
voice::notes(A3)
#> [1] A3
#> 108 Levels: C0 < C#0 < D0 < D#0 < E0 < F0 < F#0 < G0 < G#0 < A0 < A#0 < ... < B8
(B3 <- median(M$f0[820:900], na.rm = TRUE))
#> [1] 249.4395
voice::notes(B3)
#> [1] B3
#> 108 Levels: C0 < C#0 < D0 < D#0 < E0 < F0 < F#0 < G0 < G#0 < A0 < A#0 < ... < B8
(C4 <- median(M$f0[950:1100], na.rm = TRUE))
#> [1] 261.6508
voice::notes(C4)
#> [1] C4
#> 108 Levels: C0 < C#0 < D0 < D#0 < E0 < F0 < F#0 < G0 < G#0 < A0 < A#0 < ... < B8

3.6 Music sheet

# gather
f0_spn <- voice::notes(c(C3,D3,E4,F3,G3,A3,B3,C4,C4))

# duration
(dur <- voice::duration(f0_spn))
#>   note dur_line dur_ms  dur_prop
#> 1   C3        1      5 0.1111111
#> 2   D3        1      5 0.1111111
#> 3   E4        1      5 0.1111111
#> 4   F3        1      5 0.1111111
#> 5   G3        1      5 0.1111111
#> 6   A3        1      5 0.1111111
#> 7   B3        1      5 0.1111111
#> 8   C4        2     10 0.2222222

# gm by Renfei Mao
m <- gm::Music()
m <- m +
  gm::Meter(4, 4) +
  gm::Line(pitches = as.list(as.character(dur$note)),
           durations = as.list(dur$dur_line)) +
  gm::Tempo(100)
gm::show(m, to = c('score', 'audio'))