You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

119 lines
3.0 KiB

#!/usr/bin/env python
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import argparse
import json
from novoapi.backend import session
p = argparse.ArgumentParser()
p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='xxxxx')
args = p.parse_args()
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
grammar = {
"type": "confusion_network",
"version": "1.0",
"data": {
"kind": "sequence",
"elements": [
{
"kind": "word",
"pronunciation": [
{
"phones": [
"r",
"eu0",
"s"
],
"id": 0
}
,
{
"phones": [
"m",
"a0",
"n"
],
"id": 1
}
,
{
"phones": [
"m",
"a0",
"n",
"t",
"s",
"y",
"ax"
],
"id": 2
}
],
"label": "reus"
}
]
},
"return_objects": [
"grammar"
],
"phoneset": "novo70"
}
res = rec.setgrammar(grammar)
#print "Set grammar result", res
## === novoapi/backend/session.py ===
#import wave
#import time
#from novoapi.backend.session import rpcid, segmentation
#wavf = "reus1008-reus.wav"
#w = wave.open(wavf, 'r')
#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
#buf = w.readframes(nframes)
#w.close()
#buffer_size = 4096
#nbytes_sent = 0
#start = time.time()
#for j in range(0, len(buf), buffer_size):
# audio_packet = buf[j:j + buffer_size]
# nbytes_sent += len(audio_packet)
# rec.conn.send_binary(audio_packet)
#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
#print(rpcid.next())
#rec.last_message = rec.conn.recv()
#message = json.loads(rec.last_message)
#result = session.segmentation(message["result"]["words"])
#result.export()
## ====================================
def result2pronunciation(result, word):
#result_ = res.export()[1]
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
llh = result_[0]['llh']
phones = result_[0]['phones']
pronunciation = [phone['label'] for phone in phones]
return pronunciation, llh
res = rec.recognize_wav("reus1008-reus.wav")
#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
#print "Recognition result:", json.dumps(res.export(), indent=4)
result2pronunciation(res.export(), 'reus')
#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
res2 = rec.recognize_wav("reus1167-man.wav")
#print "Recognition result:", json.dumps(res2.export(), indent=4)
result2pronunciation(res2.export(), 'reus')
#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
res3 = rec.recognize_wav("reus3768-mantsje.wav")
#print "Recognition result:", json.dumps(res3.export(), indent=4)
result2pronunciation(res3.export(), 'reus')