Added check if audio file exists in getDatasetSpeechRate

2025-01-13 01:08:16 +01:00 · 2021-12-19 18:44:01 +01:00 · 2021-12-19 18:44:01 +01:00 · b536dfd958
commit b536dfd958
parent 29238f2a31
1 changed files with 13 additions and 9 deletions
--- a/helperScripts/getDatasetSpeechRate.py
+++ b/helperScripts/getDatasetSpeechRate.py
@ -6,11 +6,12 @@
 # Changelog:
 # v0.1 - 26.09.2021 - Initial version

+from genericpath import exists
 import os
 import librosa
 import csv

-dataset_dir = "/Users/thorsten/Downloads/thorsten-export-20210909/" # Directory where metadata.csv is in
+dataset_dir = "/home/thorsten/___dev/tts/dataset/Thorsten-neutral-Dec2021-44k/" # Directory where metadata.csv is in
 out_csv_file = os.path.join(dataset_dir,"speech_rate_report.csv")
 decimal_use_comma = True # False: Splitting decimal value with a dot (.); True: Comma (,)

@ -23,15 +24,18 @@ with open(os.path.join(dataset_dir,"metadata.csv")) as csvfile:
    for row in reader:
        wav_file = os.path.join(dataset_dir,"wavs",row[0] + ".wav")

-        # Gather values for report.csv output
-        phrase_len = len(row[1]) - 1 # Do not count punctuation marks.
-        duration = round(librosa.get_duration(filename=wav_file),2)
-        char_per_sec = round(phrase_len / duration,2)
+        if exists(wav_file):
+            # Gather values for report.csv output
+            phrase_len = len(row[1]) - 1 # Do not count punctuation marks.
+            duration = round(librosa.get_duration(filename=wav_file),2)
+            char_per_sec = round(phrase_len / duration,2)

-        if decimal_use_comma:
-            duration = str(duration).replace(".",",")
-            char_per_sec = str(char_per_sec).replace(".",",")
+            if decimal_use_comma:
+                duration = str(duration).replace(".",",")
+                char_per_sec = str(char_per_sec).replace(".",",")

-        out_csv.write(row[0] + ".wav;" + str(duration) + ";" + str(phrase_len) + ";" + str(char_per_sec) + ";no\n")
+            out_csv.write(row[0] + ".wav;" + str(duration) + ";" + str(phrase_len) + ";" + str(char_per_sec) + ";no\n")
+        else:
+            print("File " + wav_file + " does not exist.")

 out_csv.close()