Browse Source

The bug regarding novoapi for Python 3.6 is solved. The detail can be found in novoapi_for_python3x/readme.txt

master
yemaozi88 3 years ago
parent
commit
de5c9cecb9
  1. BIN
      .vs/acoustic_model/v15/.suo
  2. 13152
      HCompV.scp
  3. 2
      acoustic_model.sln
  4. BIN
      acoustic_model/__pycache__/defaultfiles.cpython-36.pyc
  5. 3
      acoustic_model/acoustic_model.pyproj
  6. 79
      acoustic_model/check_novoapi.py
  7. 0
      acoustic_model/convert_xsampa2ipa_readme.txt
  8. 9
      acoustic_model/novoapi_functions.py
  9. 1600
      acoustic_model/script.txt
  10. 0
      novoapi_for_python3x/__init__.py
  11. 0
      novoapi_for_python3x/asr/__init__.py
  12. 0
      novoapi_for_python3x/asr/segments/__init__.py
  13. 10
      novoapi_for_python3x/asr/segments/praat.py
  14. 0
      novoapi_for_python3x/asr/segments/segments.py
  15. 0
      novoapi_for_python3x/asr/spraaklab/__init__.py
  16. 2
      novoapi_for_python3x/asr/spraaklab/schema.py
  17. 0
      novoapi_for_python3x/backend/__init__.py
  18. 3
      novoapi_for_python3x/backend/session.py
  19. 64
      novoapi_for_python3x/readme
  20. 0
      novoapi_for_python3x/utils/json/__init__.py
  21. 119
      reus-test/reus-test.py
  22. BIN
      reus-test/reus1008-reus.wav
  23. BIN
      reus-test/reus1167-man.wav
  24. BIN
      reus-test/reus3768-mantsje.wav

BIN
.vs/acoustic_model/v15/.suo

13152
HCompV.scp
File diff suppressed because it is too large
View File

2
acoustic_model.sln

@ -10,7 +10,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
@ -18,6 +17,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
EndProjectSection

BIN
acoustic_model/__pycache__/defaultfiles.cpython-36.pyc

3
acoustic_model/acoustic_model.pyproj

@ -4,7 +4,8 @@
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
<ProjectHome>.</ProjectHome>
<StartupFile>check_novoapi.py</StartupFile>
<StartupFile>
</StartupFile>
<SearchPath>
</SearchPath>
<WorkingDirectory>.</WorkingDirectory>

79
acoustic_model/check_novoapi.py

@ -3,6 +3,9 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import sys
import csv
from collections import Counter
import random
import shutil
import numpy as np
import pandas as pd
@ -12,17 +15,18 @@ from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import novoapi
import defaultfiles as default
sys.path.append(default.forced_alignment_module_dir)
from forced_alignment import pyhtk, convert_phone_set
import acoustic_model_functions as am_func
#import acoustic_model_functions as am_func
import convert_xsampa2ipa
import defaultfiles as default
import novoapi_functions
sys.path.append(default.accent_classification_dir)
import output_confusion_matrix
## procedure
forced_alignment_novo70 = True
balance_sample_numbers = False
## ===== load novo phoneset =====
@ -149,11 +153,12 @@ for word in word_list:
## ===== forced alignment =====
reus_dir = r'C:\OneDrive\Desktop\Reus'
if forced_alignment_novo70:
Results = pd.DataFrame(index=[],
columns=['filename', 'word', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
for word in word_list:
#for word in ['Oor']:
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
#for word in word_list:
for word in ['Reus']:
# pronunciation variants top 3
df_per_word_ = df_per_word[df_per_word['word']==word]
df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
@ -178,24 +183,41 @@ if forced_alignment_novo70:
# samples in which all pronunciations are written in novo70.
samples = df_.query("ipa in @pronunciation_ipa")
## ===== balance sample numbers =====
if balance_sample_numbers:
c = Counter(samples['ipa'])
sample_num_list = [c[key] for key in c.keys()]
sample_num = np.min(sample_num_list)
samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys()))
for key in c.keys():
samples_ = samples[samples['ipa'] == key]
samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True)
samples = samples_balanced
results = pd.DataFrame(index=[],
columns=['filename', 'word', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
#j = 0
for i in range(0, len(samples)):
sample = samples.iloc[i]
wav_file = os.path.join(default.stimmen_wav_dir, sample['filename'])
filename = sample['filename']
wav_file = os.path.join(default.stimmen_wav_dir, filename)
if os.path.exists(wav_file):
#j += 1
#print('{0} - {1}'.format(word, i))
# for Martijn
#shutil.copy(wav_file, os.path.join(reus_dir, filename))
pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
result_ = pd.Series([
sample['filename'],
sample['ipa'],
sample['word'],
sample['xsampa'],
sample['ipa'],
' '.join(result_ipa),
' '.join(result_novo70),
llh
@ -203,7 +225,7 @@ if forced_alignment_novo70:
results = results.append(result_, ignore_index = True)
print('{0}/{1}: answer {2} - prediction {3}'.format(
i+1, len(samples), result_['ipa'], result_['result_ipa']))
results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8")
if len(results) > 0:
Results = Results.append(results, ignore_index = True)
Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
@ -213,19 +235,18 @@ else:
## ===== analysis =====
result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
for word in word_list:
if not word == 'Oog':
#word = 'Reus'
Results_ = Results[Results['word'] == word]
y_true = list(Results_['ipa'])
y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
y_pred = [ipa.replace('ː', ':') for ipa in y_pred_]
pronunciation_variants = list(set(y_true))
cm = confusion_matrix(y_true, y_pred, labels=pronunciation_variants)
plt.figure()
output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
#plt.show()
plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))
#result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
#for word in word_list:
# if not word == 'Oog':
# Results_ = Results[Results['word'] == word]
# y_true = list(Results_['ipa'])
# y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
# y_pred = [ipa.replace('ː', ':') for ipa in y_pred_]
# pronunciation_variants = list(set(y_true))
# cm = confusion_matrix(y_true, y_pred, labels=pronunciation_variants)
# plt.figure()
# output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
# #plt.show()
# plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))

0
acoustic_model/how_to_use_ipa-xsampa_converer.txt → acoustic_model/convert_xsampa2ipa_readme.txt

9
acoustic_model/novoapi_functions.py

@ -6,6 +6,8 @@ import json
from novoapi.backend import session
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import defaultfiles as default
@ -189,4 +191,9 @@ def result2pronunciation(result, word):
phones = result_[0]['phones']
pronunciation_novo70 = [phone['label'] for phone in phones]
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
return pronunciation_ipa, pronunciation_novo70, llh
return pronunciation_ipa, pronunciation_novo70, llh
if __name__ == 'main':
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
grammar = make_grammar('reus', pronunciation_ipa)

1600
acoustic_model/script.txt
File diff suppressed because it is too large
View File

0
novoapi/__init__.py → novoapi_for_python3x/__init__.py

0
novoapi/asr/__init__.py → novoapi_for_python3x/asr/__init__.py

0
novoapi/asr/segments/__init__.py → novoapi_for_python3x/asr/segments/__init__.py

10
novoapi/asr/segments/praat.py → novoapi_for_python3x/asr/segments/praat.py

@ -28,7 +28,6 @@ def print_info_tier(output, title, begin, end, label):
print >> output, '\t\t\ttext = "%s"' % label
#def print_tier(output, title, begin, end, segs, (format, formatter)):
def print_tier(output, title, begin, end, segs, format, formatter):
print >> output, '\titem [%d]:' % 0
print >> output, '\t\tclass = "IntervalTier"'
@ -70,8 +69,11 @@ def seg2tg(fname, segments):
nr_tiers = 3
print_header(output, begin, end, nr_tiers)
print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
#print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
#print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
#print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
output.close()

0
novoapi/asr/segments/segments.py → novoapi_for_python3x/asr/segments/segments.py

0
novoapi/asr/spraaklab/__init__.py → novoapi_for_python3x/asr/spraaklab/__init__.py

2
novoapi/asr/spraaklab/schema.py → novoapi_for_python3x/asr/spraaklab/schema.py

@ -266,7 +266,7 @@ def test(data=None):
print("{0} validated not OK {1}".format(data, e.message))
else:
#print data, "validated OK"
print("{} validated OK".format(data))
print("{0} validated OK".format(data))
if __name__ == "__main__":

0
novoapi/backend/__init__.py → novoapi_for_python3x/backend/__init__.py

3
novoapi/backend/session.py → novoapi_for_python3x/backend/session.py

@ -188,7 +188,8 @@ class Recognizer(object):
nbytes_sent = 0
start = time.time()
for j in range(0, len(buf), buffer_size):
audio_packet = str(buf[j:j + buffer_size])
#audio_packet = str(buf[j:j + buffer_size])
audio_packet = buf[j:j + buffer_size]
nbytes_sent += len(audio_packet)
self.conn.send_binary(audio_packet)
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))

64
novoapi_for_python3x/readme

@ -0,0 +1,64 @@
novoapi( https://bitbucket.org/novolanguage/python-novo-api ) is written in Python 2.7.
To install it on Python 3.x the following points should be modified.
- basestring --> str
- print xxx --> print({}.format(xxx)).
- import xxx --> from . import xxx
- except Exception, e --> except Exception as e
- remove tuples from input arguments of a function.
Concretely...
=== novoapi\backend\__init__.py
#import session
from . import session
=== novoapi\backend\session.py
#except Exception, e:
except Exception as e:
#print self.last_message
print(self.last_message)
=== novoapi\asr\__init__.py
#import segments
#import spraaklab
from . import segments
from . import spraaklab
=== novoapi\asr\segments\praat.py
#print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
#print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
#print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
=== novoapi\asr\spraaklab\__init__.py ===
#import schema
from . import schema
=== novoapi\asr\spraaklab\schema.py ===
#if isinstance(object, basestring):
if isinstance(object, str):
except jsonschema.ValidationError as e:
#print data, "validated not OK", e.message
print("{0} validated not OK {1}".format(data, e.message))
else:
#print data, "validated OK"
print("{0} validated OK".format(data))
Then to make it correctly work, few more modification is needed.
When the wav file is read using the wave module, the output (named buf) is a string of bytes on Python 2.7 while buf is a byte object on Python 3.6.
Therefore...
=== novoapi\backend\session.py
#audio_packet = str(buf[j:j + buffer_size])
audio_packet = buf[j:j + buffer_size]
Also, because of this difference, Segment.__repr__ (novoapi\asr\segments\segments.py) does not work.

0
novoapi/utils/json/__init__.py → novoapi_for_python3x/utils/json/__init__.py

119
reus-test/reus-test.py

@ -0,0 +1,119 @@
#!/usr/bin/env python
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import argparse
import json
from novoapi.backend import session
p = argparse.ArgumentParser()
p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='xxxxx')
args = p.parse_args()
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
grammar = {
"type": "confusion_network",
"version": "1.0",
"data": {
"kind": "sequence",
"elements": [
{
"kind": "word",
"pronunciation": [
{
"phones": [
"r",
"eu0",
"s"
],
"id": 0
}
,
{
"phones": [
"m",
"a0",
"n"
],
"id": 1
}
,
{
"phones": [
"m",
"a0",
"n",
"t",
"s",
"y",
"ax"
],
"id": 2
}
],
"label": "reus"
}
]
},
"return_objects": [
"grammar"
],
"phoneset": "novo70"
}
res = rec.setgrammar(grammar)
#print "Set grammar result", res
## === novoapi/backend/session.py ===
#import wave
#import time
#from novoapi.backend.session import rpcid, segmentation
#wavf = "reus1008-reus.wav"
#w = wave.open(wavf, 'r')
#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
#buf = w.readframes(nframes)
#w.close()
#buffer_size = 4096
#nbytes_sent = 0
#start = time.time()
#for j in range(0, len(buf), buffer_size):
# audio_packet = buf[j:j + buffer_size]
# nbytes_sent += len(audio_packet)
# rec.conn.send_binary(audio_packet)
#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
#print(rpcid.next())
#rec.last_message = rec.conn.recv()
#message = json.loads(rec.last_message)
#result = session.segmentation(message["result"]["words"])
#result.export()
## ====================================
def result2pronunciation(result, word):
#result_ = res.export()[1]
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
llh = result_[0]['llh']
phones = result_[0]['phones']
pronunciation = [phone['label'] for phone in phones]
return pronunciation, llh
res = rec.recognize_wav("reus1008-reus.wav")
#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
#print "Recognition result:", json.dumps(res.export(), indent=4)
result2pronunciation(res.export(), 'reus')
#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
res2 = rec.recognize_wav("reus1167-man.wav")
#print "Recognition result:", json.dumps(res2.export(), indent=4)
result2pronunciation(res2.export(), 'reus')
#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
res3 = rec.recognize_wav("reus3768-mantsje.wav")
#print "Recognition result:", json.dumps(res3.export(), indent=4)
result2pronunciation(res3.export(), 'reus')

BIN
reus-test/reus1008-reus.wav

BIN
reus-test/reus1167-man.wav

BIN
reus-test/reus3768-mantsje.wav

Loading…
Cancel
Save