mirror of
https://github.com/thorstenMueller/Thorsten-Voice.git
synced 2024-11-24 00:43:40 +01:00
Added check if audio file exists in getDatasetSpeechRate
This commit is contained in:
parent
29238f2a31
commit
b536dfd958
@ -6,11 +6,12 @@
|
|||||||
# Changelog:
|
# Changelog:
|
||||||
# v0.1 - 26.09.2021 - Initial version
|
# v0.1 - 26.09.2021 - Initial version
|
||||||
|
|
||||||
|
from genericpath import exists
|
||||||
import os
|
import os
|
||||||
import librosa
|
import librosa
|
||||||
import csv
|
import csv
|
||||||
|
|
||||||
dataset_dir = "/Users/thorsten/Downloads/thorsten-export-20210909/" # Directory where metadata.csv is in
|
dataset_dir = "/home/thorsten/___dev/tts/dataset/Thorsten-neutral-Dec2021-44k/" # Directory where metadata.csv is in
|
||||||
out_csv_file = os.path.join(dataset_dir,"speech_rate_report.csv")
|
out_csv_file = os.path.join(dataset_dir,"speech_rate_report.csv")
|
||||||
decimal_use_comma = True # False: Splitting decimal value with a dot (.); True: Comma (,)
|
decimal_use_comma = True # False: Splitting decimal value with a dot (.); True: Comma (,)
|
||||||
|
|
||||||
@ -23,15 +24,18 @@ with open(os.path.join(dataset_dir,"metadata.csv")) as csvfile:
|
|||||||
for row in reader:
|
for row in reader:
|
||||||
wav_file = os.path.join(dataset_dir,"wavs",row[0] + ".wav")
|
wav_file = os.path.join(dataset_dir,"wavs",row[0] + ".wav")
|
||||||
|
|
||||||
# Gather values for report.csv output
|
if exists(wav_file):
|
||||||
phrase_len = len(row[1]) - 1 # Do not count punctuation marks.
|
# Gather values for report.csv output
|
||||||
duration = round(librosa.get_duration(filename=wav_file),2)
|
phrase_len = len(row[1]) - 1 # Do not count punctuation marks.
|
||||||
char_per_sec = round(phrase_len / duration,2)
|
duration = round(librosa.get_duration(filename=wav_file),2)
|
||||||
|
char_per_sec = round(phrase_len / duration,2)
|
||||||
|
|
||||||
if decimal_use_comma:
|
if decimal_use_comma:
|
||||||
duration = str(duration).replace(".",",")
|
duration = str(duration).replace(".",",")
|
||||||
char_per_sec = str(char_per_sec).replace(".",",")
|
char_per_sec = str(char_per_sec).replace(".",",")
|
||||||
|
|
||||||
out_csv.write(row[0] + ".wav;" + str(duration) + ";" + str(phrase_len) + ";" + str(char_per_sec) + ";no\n")
|
out_csv.write(row[0] + ".wav;" + str(duration) + ";" + str(phrase_len) + ";" + str(char_per_sec) + ";no\n")
|
||||||
|
else:
|
||||||
|
print("File " + wav_file + " does not exist.")
|
||||||
|
|
||||||
out_csv.close()
|
out_csv.close()
|
||||||
|
Loading…
Reference in New Issue
Block a user