miyehn-speech

http://miyehn.me/p/speech

It’s my first time working with speech input so everything was new to me. My note shows how struggling I was with coming up with ideas… The only thing I know I wanted from the beginning is speech input close to natural language. Because otherwise throwing out random words that are obviously commands seems a bit awkward in front of people who don’t know what’s going on.

Click to see my note

I went with the last idea which Claire helped me with brainstorming. I like working with visuals so integrating speech with visuals sounds fun to me. The idea is that I want to require the user to speak as if he/she is talking to characters instead of to a computer (so please be nice 🙂

There are actually a lot more things that one can ask the elements to do than the ones shown in the demo. Input speech sentence is categorized by keywords into three types:

  • selection (which element(s) do you want to interact with?)
    • greet them first, like “hi” or “hey”
    • can select as many as one wants
    • can select by type (firefly / sun / star), color, quantifier + direction (the left most star, the 2nd and 4th sun from the top, etc), or just everyone
  • instruction
    • use nice words like “please” or “can you”
    • move toward a certain direction – this can also be quantified. Simply moving is different from moving a little or a tiny bit, etc.
    • grow and shrink
    • change color
  • end conversation (clear selection)
    • either tell the elements they did well (“nice! Great!”)
    • or say something like (“nevermind”) to start over.

Since all the interactions are recognized by keywords (yes I was inspired by the installation about natural language processing in Gates center), there can theoretically be infinitely many sentences recognized. After playing around for a while hopefully the user can forget about the hardcoded keywords and just speak naturally and still be recognized. I kinda reached that, which feels nice.

Below is my code.

 

var mySpeechRecognizer;
//Thank Golan for his template of p5 speech!
var elemList;
 
var filtered;
var cmd = '';
 
var a_num;
var a_quantifier = 1;
var a_identifier = '';
var a_direction = '';
var a_col = '';
var everyone = false;
var all = false;
 
//=========================================
function setup() {
	createCanvas(480, 480);
	//background(15);
	noStroke();
	colorMode(HSB, 360, 100, 100, 100);
	initializeMySpeechRecognizer(); 
 
	filtered = new Array();
	elemList = new Array();
	for(var i=0; i<5; i++){
		elemList.push(new Firefly());
		elemList.push(new Sun());
	}
	for(var i=0; i<10; i++){
		elemList.push(new Star());
	}
 
	a_num = [1];
}
 
function draw() {
	background(20);
	for(var i=0; i<elemList.length; i++) { elemList[i].update(); elemList[i].display(); } } //========================================= function initializeMySpeechRecognizer(){ mySpeechRecognizer = new p5.SpeechRec('en-US'); // These are important settings to experiment with mySpeechRecognizer.continuous = true; // Do continuous recognition mySpeechRecognizer.interimResults = false; // Allow partial recognition (faster, less accurate) mySpeechRecognizer.onResult = parseResult; // The speech recognition callback function mySpeechRecognizer.start(); // Start the recognition engine. Requires an internet connection! } //========================================= function keyPressed(){ if (key === ' '){ initializeMySpeechRecognizer(); } } //========================================= function rm(array, element) { const index = array.indexOf(element); array.splice(index, 1); } function parseResult() { var s = mySpeechRecognizer.resultString; s = s.toLowerCase(); console.log(s); if(/nice/.test(s) || /well done/.test(s) || /good/.test(s) || /great/.test(s) || /perfect/.test(s) || /nevermind/.test(s) || /cool/.test(s)) { a_num = [1]; a_quantifier = 1; a_identifier = ''; a_direction = ''; a_col = ''; everyone = false; all = false; filtered.forEach(item=>item.attention = false);
		filtered = [];
		cmd = '';
	}
	//else if(/can you please/.test(s)) cmd = s.substring(s.indexOf('can you please')+ 14);
	else if(/can you/.test(s)) cmd = s.substring(s.indexOf('can you')+ 7);
	else if(/please/.test(s)) cmd = s.substring(s.indexOf('please')+ 6);
	else if(/maybe/.test(s)) cmd = s.substring(s.indexOf('maybe')+ 5);
	else if(/hey/.test(s) || /hello/.test(s) || /hi/.test(s)){
		if(/everyone/.test(s) || /everybody/.test(s)) everyone = true;
		else {
			var a = ' ' + s + ' ';
			//if(/ first /.test(a) || /most/.test(a)) a_num.push(1);
			if(/ second /.test(a) || / 2nd /.test(a)) {rm(a_num,1);a_num.push(2);}
			if(/ third /.test(a) || / 3rd /.test(a)) {rm(a_num,1);a_num.push(3);}
			if(/ fourth /.test(a) || / 4th /.test(a)) {rm(a_num,1);a_num.push(4);}
			if(/ fifth /.test(a) || / 5th /.test(a)) {rm(a_num,1);a_num.push(5);}
 
			if(/top/.test(a)) a_direction = 'top';
			else if(/bottom/.test(a)) a_direction = 'bottom';
			else if(/left/.test(a)) a_direction = 'left';
			else if(/right/.test(a)) a_direction = 'right';
 
			if(/ son /.test(a) || 
			   / sons /.test(a) || 
			   / song /.test(a) ||
			   / songs /.test(a) ||
			   / sun /.test(a)) a_identifier = 'sun';
			else if(/ tar /.test(a) ||
					/ tars /.test(a) ||
					/ our /.test(a) ||
					/ ours /.test(a) ||
					/ star /.test(a) ||
					/ stars /.test(a)) a_identifier = 'star';
			else if(/ firefly /.test(a) ||
					/ spotify /.test(a) ||
					/ fireflies /.test(a)) a_identifier = 'firefly';
			else a_identifier = 'firefly';
 
			if(/all/.test(s) || / awe /.test(s)) all = true;
 
			if(/ red /.test(s)) a_col = 'red';
			else if(/ orange /.test(s)) a_col = 'orange';
			else if(/ yellow /.test(s)) a_col = 'yellow';
			else if(/ green /.test(s)) a_col = 'green';
			else if(/ blue /.test(s)) a_col = 'blue';
			else if(/ purple /.test(s)) a_col = 'purple';
 
			if(/ two /.test(a) || / 2 /.test(a)) a_quantifier = 2;
			else if(/ three /.test(a) || / 3 /.test(a)) a_quantifier = 3;
			else if(/ four /.test(a) || / 4 /.test(a) || / for /.test(a)) a_quantifier = 4;
			else if(/ five /.test(a) || / 5 /.test(a)) a_quantifier = 5;
		}
 
		var filterType = elemList.filter(item=>item.type==a_identifier);
		console.log(filterType);
		if(everyone){
			filtered = elemList;
		} else if(all) {
			filtered = filterType;
		} else if(a_col!=''){
			filtered = filterType.filter(item=>item.col==a_col);
		} else {
			if(a_direction=='top') filterType = filterType.sort((a,b)=>a.y>b.y);
			else if(a_direction=='left') filterType.sort((a,b)=>a.x>b.x);
			else if(a_direction=='bottom') filterType.sort((a,b)=>a.y<b.y); else if(a_direction=='right') filterType.sort((a,b)=>a.x<b.x); if(a_quantifier >= 2) {
				for(var i=0; i<a_quantifier; i++) filtered.push(filterType[i]); } else { a_num.forEach(item=>filtered.push(filterType[item-1]));
			}
		}
		filtered.forEach(item=>item.attention = true);
	}
 
	if(cmd!=''){
		//movement commands
		if(/move/.test(cmd) || /moving/.test(cmd) || /go/.test(cmd)){
			if(/up/.test(cmd)){
				if(/little/.test(cmd)||/bit/.test(cmd)){
					if(/tiny/.test(cmd)) filtered.forEach(item=>item.move('up', 0.25));
					else filtered.forEach(item=>item.move('up', 0.5));
				}
				else filtered.forEach(item=>item.move('up', 1));
			} else if(/down/.test(cmd)){
				if(/little/.test(cmd)||/bit/.test(cmd)){
					if(/tiny/.test(cmd)) filtered.forEach(item=>item.move('down', 0.25));
					else filtered.forEach(item=>item.move('down', 0.5));
				}
				else filtered.forEach(item=>item.move('down', 1));
			} else if(/left/.test(cmd)){
				if(/little/.test(cmd)||/bit/.test(cmd)){
					if(/tiny/.test(cmd)) filtered.forEach(item=>item.move('left', 0.25));
					else filtered.forEach(item=>item.move('left', 0.5));
				}
				else filtered.forEach(item=>item.move('left', 1));
			} else if(/right/.test(cmd)){
				if(/little/.test(cmd)||/bit/.test(cmd)){
					if(/tiny/.test(cmd)) filtered.forEach(item=>item.move('right', 0.25));
					else filtered.forEach(item=>item.move('right', 0.5));
				}
				else filtered.forEach(item=>item.move('right', 1));
			}
		}
		//grow / shrink commands
		else if(/big/.test(cmd) || /large/.test(cmd) || /grow/.test(cmd)) {
			filtered.forEach(item=>item.grow());
		} else if(/small/.test(cmd) || /shrink/.test(cmd)) {
			filtered.forEach(item=>item.shrink());
		}
		//change color commands
		else if(/turn/.test(cmd) || /change/.test(cmd)) {
			if(/red/.test(cmd)) filtered.forEach(item=>item.changeColor('red'));
			else if(/orange/.test(cmd)) filtered.forEach(item=>item.changeColor('orange'));
			else if(/yellow/.test(cmd)) filtered.forEach(item=>item.changeColor('yellow'));
			else if(/green/.test(cmd)) filtered.forEach(item=>item.changeColor('green'));
			else if(/blue/.test(cmd)) filtered.forEach(item=>item.changeColor('blue'));
			else if(/purple/.test(cmd)) filtered.forEach(item=>item.changeColor('purple'));
		}
	}
}
 
class Element{
	constructor(){
		var cols = ['red', 'orange', 'yellow', 'green', 'blue', 'purple']
		var hs = [0, 24, 56, 124, 210, 266];
		var rand = Math.floor(random(6))
		this.col = cols[rand];
		this.h = hs[rand];
		this.s = random(60, 80);
		this.b = random(75, 95);
		this.size = 50;
		this.x = random(width);
		this.y = random(height);
		this.r = 0.05;
		this.ch = 0;
		this.cs = 0;
		this.cb = 0;
		this.csize = 0;
		this.cx = 0;
		this.cy = 0;
		this.th = this.h;
		this.ts = this.s;
		this.tb = this.b;
		this.tsize = 50;
		this.tx = this.x;
		this.ty = this.y;
		this.asleep = false;
		this.attention = false;
	}
 
	move(dir, amt){
		if(dir=='left') this.tx = this.x-80*amt;
		else if(dir=='right') this.tx = this.x+80*amt;
		else if(dir=='up') this.ty = this.y-80*amt;
		else if(dir=='down') this.ty = this.y+80*amt;
	}
 
	changeColor(col) {
		if(col=='red') {this.th = 0; this.col = 'red';}
		else if(col=='orange'){this.th = 24; this.col = 'orange';}
		else if(col=='yellow'){this.th = 56; this.col = 'yellow';}
		else if(col=='green'){this.th = 124; this.col = 'green';}
		else if(col=='blue'){this.th = 210; this.col = 'blue';}
		else if(col=='purple'){this.th = 266; this.col = 'purple';}
	}
 
	grow(){
		if(this.type!='star')this.tsize = this.size*1.5;
	}
 
	shrink(){
		if(this.type!='star')this.tsize = this.size*0.75;
	}
 
	update(){
		var r = 0.05;
		this.ch = this.th-this.h;
		this.cs = this.ts-this.s;
		this.cb = this.tb-this.b;
		this.csize = this.tsize-this.size;
		this.cx = this.tx-this.x;
		this.cy = this.ty-this.y;
		this.h += this.ch*r;
		this.s += this.cs*r;
		this.b += this.cb*r;
		this.size += this.csize*r;
		this.x += this.cx*r;
		this.y += this.cy*r;
	}
}
 
class Firefly extends Element{
	constructor(){
		super();
		this.size = 100;
		this.tsize = 100;
		this.type = 'firefly';
	}
 
	display(){
		fill(this.h, this.s, this.b, 1);
		for(var i=this.size; i>0; i-=2){
			ellipse(this.x, this.y, i, i);
		}
		fill(this.h, this.s, this.b, 80);
		ellipse(this.x, this.y, 6, 6);
		if(this.attention){
			fill(0,0,100);
			text('!', this.x+4, this.y);
		}
	}
}
 
class Sun extends Element{
	constructor(){
		super();
		this.size = 50;
		this.tsize = 50;
		this.type = 'sun';
	}
 
	display(){
		fill(this.h, this.s, this.b);
		ellipse(this.x, this.y, this.size, this.size);
		if(this.attention){
			fill(0,0,100);
			text('!', this.x+4, this.y);
		}
	}
}
 
class Star extends Element{
	constructor(){
		super();
		this.size = 3;
		this.tsize = 3;
		this.type = 'star';
	}
 
	display(){
		fill(0,0,100);
		ellipse(this.x, this.y, this.size, this.size);
		if(this.attention){
			fill(0,0,100);
			text('!', this.x+4, this.y);
		}
	}
}