음성 인식 : CLOVA Speech Recognition (CSR) / CLOVA Voice

API/NAVER CLOVA

음성 인식 : CLOVA Speech Recognition (CSR) / CLOVA Voice - Premium

olli2 2022. 1. 26. 00:12

CLOVA Speech Recognition (CSR) : 음성 인식

사람의 목소리를 텍스트로 변환 | 음성을 텍스트로 변환 : STT(Speech-To-Text)
언어 선택 가능
Application에서 CLOVA Speech Recognition (CSR) 서비스 추가하여 사용

개발 가이드 확인

[ STT ]

https://api.ncloud-docs.com/docs/ai-naver-clovaspeechrecognition-stt

stt (Speech-To-Text) - CLOVA Speech Recognition(CSR)

api.ncloud-docs.com

[ TTS ]

https://api.ncloud-docs.com/docs/ai-naver-clovavoice-ttspremium

tts (Premium) - CLOVA Voice

api.ncloud-docs.com

CLOVA Speech Recognition (CSR) : 음성 인식 실습

(1) 결과를 콘솔에 출력

(2) 파일 업로드 / 추출된 텍스트 출력

(3) 언어 선택 <select name=”language”> 추가

index.jsp

<a href="clovaSTTForm">Speech To Text</a><br><br>
<a href="clovaSTTForm2">Speech To Text2 : 언어 선택 추가</a><br><br>
<a href="clovaTTSForm">Text To Speech</a><br><br>

STTService

package com.ai.ex.service;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Date;

import org.json.JSONObject;
import org.springframework.stereotype.Service;

@Service 
public class STTService {
	public String clovaSpeechToText(String filePathName) {
		String clientId = "";             // Application Client ID";
        String clientSecret = "";     // Application Client Secret";
        
        String result = "";
        
        try {
            String imgFile = filePathName;
            File voiceFile = new File(imgFile);

            String language = "Kor";        // 언어 코드 ( Kor, Jpn, Eng, Chn )
            String apiURL = "https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=" + language;
            URL url = new URL(apiURL);

            HttpURLConnection conn = (HttpURLConnection)url.openConnection();
            conn.setUseCaches(false);
            conn.setDoOutput(true);
            conn.setDoInput(true);
            conn.setRequestProperty("Content-Type", "application/octet-stream");
            conn.setRequestProperty("X-NCP-APIGW-API-KEY-ID", clientId);
            conn.setRequestProperty("X-NCP-APIGW-API-KEY", clientSecret);

            OutputStream outputStream = conn.getOutputStream();
            FileInputStream inputStream = new FileInputStream(voiceFile);
            byte[] buffer = new byte[4096];
            int bytesRead = -1;
            while ((bytesRead = inputStream.read(buffer)) != -1) {
                outputStream.write(buffer, 0, bytesRead);
            }
            outputStream.flush();
            inputStream.close();
            BufferedReader br = null;
            int responseCode = conn.getResponseCode();
            if(responseCode == 200) { // 정상 호출
                br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
            } else {  // 오류 발생
                System.out.println("error!!!!!!! responseCode= " + responseCode);
                br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
            }
            String inputLine;

            if(br != null) {
                StringBuffer response = new StringBuffer();
                while ((inputLine = br.readLine()) != null) {
                    response.append(inputLine);
                }
                br.close();
                System.out.println(response.toString());
                result = jsonToString(response.toString()); // JSON 문자열에서 텍스트 추출
                resultToFileSave(result); //파일로 저장
                
            } else {
                System.out.println("error !!!");
            }
        } catch (Exception e) {
            System.out.println(e);
        }
        
        return result;
	}
    
    public String clovaSpeechToText2(String filePathName, String language) {
		String clientId = "";             // Application Client ID";
        String clientSecret = "";     // Application Client Secret";
        
        String result = "";
        
        try {
            String imgFile = filePathName;
            File voiceFile = new File(imgFile);

            //String language = "Kor";        // 언어 코드 ( Kor, Jpn, Eng, Chn )
            String apiURL = "https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=" + language;
            URL url = new URL(apiURL);

            HttpURLConnection conn = (HttpURLConnection)url.openConnection();
            conn.setUseCaches(false);
            conn.setDoOutput(true);
            conn.setDoInput(true);
            conn.setRequestProperty("Content-Type", "application/octet-stream");
            conn.setRequestProperty("X-NCP-APIGW-API-KEY-ID", clientId);
            conn.setRequestProperty("X-NCP-APIGW-API-KEY", clientSecret);

            OutputStream outputStream = conn.getOutputStream();
            FileInputStream inputStream = new FileInputStream(voiceFile);
            byte[] buffer = new byte[4096];
            int bytesRead = -1;
            while ((bytesRead = inputStream.read(buffer)) != -1) {
                outputStream.write(buffer, 0, bytesRead);
            }
            outputStream.flush();
            inputStream.close();
            BufferedReader br = null;
            int responseCode = conn.getResponseCode();
            if(responseCode == 200) { // 정상 호출
                br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
            } else {  // 오류 발생
                System.out.println("error!!!!!!! responseCode= " + responseCode);
                br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
            }
            String inputLine;

            if(br != null) {
                StringBuffer response = new StringBuffer();
                while ((inputLine = br.readLine()) != null) {
                    response.append(inputLine);
                }
                br.close();
                System.out.println(response.toString());
                result = jsonToString(response.toString()); // JSON 문자열에서 텍스트 추출
                resultToFileSave(result); //파일로 저장
                
            } else {
                System.out.println("error !!!");
            }
        } catch (Exception e) {
            System.out.println(e);
        }
        
        return result;
	}
	
	// API 서버로부터 받은 JSON 형태의 결과 데이터를 전달받아서 text 추출
	public String jsonToString(String jsonResultStr) {
		String resultText = "";
		
		try {
			//org.json.JSONObject; 사용한 경우
			JSONObject jsonObj = new JSONObject(jsonResultStr);
			resultText = (String) jsonObj.getString("text");
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}			
		
		return resultText;
	}
	
	// 음성 파일에서 추출한 텍스트를 txt 파일로 저장
	public void resultToFileSave(String result) {
		try {
			String fileName = Long.valueOf(new Date().getTime()).toString();
			String filePathName = "C:/upload/" + "stt_" + fileName + ".txt";
			
			FileWriter fw = new FileWriter(filePathName);
			fw.write(result);
			fw.close();
			
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

APIController

@Autowired
private STTService sttService;

@Autowired
private TTSService ttsService;

// Speech-To-Text
@RequestMapping("/clovaSTTForm")
public String clovaSTTForm() {
    return "sttView";
}

// 언어 선택 추가
// Speech-To-Text
@RequestMapping("/clovaSTTForm2")
public String clovaSTTForm2() {
    return "sttView2";
}

// TTS : 텍스트를 음성 파일로 변환
// (1) 결과를 upload 폴더에 mp3 파일로 저장
/*@RequestMapping("/clovaTTS")
public void clovaTTS() {
    ttsService.clovaTextToSpeech();
}*/

@RequestMapping("/clovaTTSForm")
public String clovaTTSForm() {
    return "ttsView";
}

sttView.jsp

<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
 <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core" %>
<!DOCTYPE html>
<html>
	<head>
		<meta charset="UTF-8">
		<title>STT</title>
		<script src="<c:url value='/js/jquery-3.6.0.min.js'/>"></script>
		<script src="<c:url value='/js/stt.js'/>"></script>		
		
	</head>
	<body>
		<!--  파일 업로드 -->
		<h3>CSR : STT (음성을 텍스트로 변환)</h3>
		<form id="sttForm" enctype="multipart/form-data">
			파일 : <input type="file" id="uploadFile" name="uploadFile"> 
			<input type="submit" value="결과 확인">		
		</form>
		<br><br>
		
		<!-- 결과 출력  -->	
		<h3>STT :  음성을 텍스트로 변환한 결과</h3> 	
		<div id="resultDiv"></div><br><br>
		
		<div><audio preload="auto" controls></audio></div>
		
		<br><br>
		<a href="/">index 페이지로 이동</a>
		
	</body>
</html>

stt.js

/**
 * stt.js
 */
 
 $(function(){
	
	$('#sttForm').on('submit', function(event){	
		event.preventDefault();
		 var formData = new FormData($('#sttForm')[0]);
		 
		 // 업로드된 파일명 알아오기
		 var fileName = $('#uploadFile').val().split("\\").pop();
		// alert($('#uploadFile').val());
		$('audio').prop("src", '/voice/' + fileName);
		 
		$.ajax({
			url:"clovaSTT",
			enctype:'multipart/form-data',
			type:"post",
			data:formData,
			processData: false,  // 필수
			contentType: false,  // 필스
			success:function(result){
				$('#resultDiv').text(result);  
			},
			error:function(e){
				alert("오류가 발생했습니다." + e)
			}
		});
		
	});		
});

sst2.jsp

<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
 <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core" %>
<!DOCTYPE html>
<html>
	<head>
		<meta charset="UTF-8">
		<title>STT2</title>
		<script src="<c:url value='/js/jquery-3.6.0.min.js'/>"></script>
		<script src="<c:url value='/js/stt2.js'/>"></script>		
		
	</head>
	<body>
		<!--  파일 업로드 -->
		<h3>CSR : STT (음성을 텍스트로 변환)</h3>
		<form id="sttForm" enctype="multipart/form-data">
			파일 : <input type="file" id="uploadFile" name="uploadFile"><br><br>
			
			언어 : <select name="language">
						<option value="Kor">한국어</option>
						<option value="Eng">영어</option>
						<option value="Jpn">일본어</option>
						<option value="Chn">중국어</option>			
					</select><br><br>
			
			<input type="submit" value="결과 확인">		
		</form>
		<br><br>
		
		<!-- 결과 출력  -->	
		<h3>STT :  음성을 텍스트로 변환한 결과</h3> 	
		<div id="resultDiv"></div><br><br>
		
		<div><audio preload="auto" controls></audio></div>
		
		<br><br>
		<a href="/">index 페이지로 이동</a>
		
	</body>
</html>

sst2.js

/**
 * stt2.js
 */
 
 $(function(){
	
	$('#sttForm').on('submit', function(event){	
		event.preventDefault();
		 var formData = new FormData($('#sttForm')[0]);
		 
		 // 업로드된 파일명 알아오기
		 var fileName = $('#uploadFile').val().split("\\").pop();
		// alert($('#uploadFile').val());
		$('audio').prop("src", '/voice/' + fileName);
		 
		$.ajax({
			url:"clovaSTT2",
			enctype:'multipart/form-data',
			type:"post",
			data:formData,
			processData: false,  // 필수
			contentType: false,  // 필스
			success:function(result){
				$('#resultDiv').text(result);  
			},
			error:function(e){
				alert("오류가 발생했습니다." + e)
			}
		});
		
	});		
});

APIRestController

@Autowired
private TTSService ttsService;

@RequestMapping("/clovaSTT")
	public String  clovaSTT(@RequestParam("uploadFile") MultipartFile file) {		
		
		String result = "";
		
		try {
			// 1. 파일 저장 경로 설정 : 실제 서비스되는 위치 (프로젝트 외부에 저장)
			String uploadPath = "C:/upload/";
			
			// 2. 원본 파일 이름 알아오기
			String originalFileName = file.getOriginalFilename();
			String filePathName = uploadPath + originalFileName;
			
			// 3. 파일 생성
			File file1 = new File(filePathName);
			
			// 4. 서버로 전송
			file.transferTo(file1);				
			
			// 서비스에 파일 path와 파일명 전달  -> 서비스 메소드에서 변경
			// 서비스에서 반환된 PoseVO 리스트 저장
			result = sttService.clovaSpeechToText(filePathName);			
		
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		return result;
	}
	
	// language 언어 선택 추가
	@RequestMapping("/clovaSTT2")
	public String  clovaSTT2(@RequestParam("uploadFile") MultipartFile file,
											  @RequestParam("language") String language) {		
		
		String result = "";
		
		try {
			// 1. 파일 저장 경로 설정 : 실제 서비스되는 위치 (프로젝트 외부에 저장)
			String uploadPath = "C:/upload/";
			
			// 2. 원본 파일 이름 알아오기
			String originalFileName = file.getOriginalFilename();
			String filePathName = uploadPath + originalFileName;
			
			// 3. 파일 생성
			File file1 = new File(filePathName);
			
			// 4. 서버로 전송
			file.transferTo(file1);				
			
			// 서비스에 파일 path와 파일명 전달  -> 서비스 메소드에서 변경
			// 서비스에서 반환된 PoseVO 리스트 저장
			result = sttService.clovaSpeechToText2(filePathName, language);			
		
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		return result;
	}

CLOVA Voice - Premium : 음성 합성 API 서비스

텍스트를 음성으로 변환 : TTS(Text-To-Speech)
텍스트 파일을 입력 받아서 변환된 음성 파일(mp3/wav) 반환
언어, 음색 선택 가능
CLOVA Voice - Premium 서비스 추가하여 사용

CLOVA Voice - Premium 실습

(1) 문자열 전송하고 결과를 mp3파일로 저장

(2) 파일 업로드 / 결과 mp3파일 <audio> 플레이

TTSService

package com.ai.ex.service;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Date;

import org.springframework.stereotype.Service;

@Service
public class TTSService {
	public void clovaTextToSpeech() {
		String clientId = "";//애플리케이션 클라이언트 아이디값";
        String clientSecret = "";//애플리케이션 클라이언트 시크릿값";
        try {
        	
        	// "만나서 반갑습니다." 라는 글자를 '인코딩해서' 전달
            String text = URLEncoder.encode("만나서 반갑습니다.", "UTF-8"); // 13자
            String apiURL = "https://naveropenapi.apigw.ntruss.com/tts-premium/v1/tts";
            URL url = new URL(apiURL);
            HttpURLConnection con = (HttpURLConnection)url.openConnection();
            con.setRequestMethod("POST");
            con.setRequestProperty("X-NCP-APIGW-API-KEY-ID", clientId);
            con.setRequestProperty("X-NCP-APIGW-API-KEY", clientSecret);
            // post request
            String postParams = "speaker=clara&volume=0&speed=0&pitch=0&format=mp3&text=" + text;
            con.setDoOutput(true);
            DataOutputStream wr = new DataOutputStream(con.getOutputStream());
            wr.writeBytes(postParams);
            wr.flush();
            wr.close();
            int responseCode = con.getResponseCode();
            BufferedReader br;
            if(responseCode==200) { // 정상 호출
                InputStream is = con.getInputStream();
                int read = 0;
                byte[] bytes = new byte[1024];
                // 랜덤한 이름으로 mp3 파일 생성
                String tempname = Long.valueOf(new Date().getTime()).toString();
                // File f = new File(tempname + ".mp3");
                File f = new File("C:/upload/" + tempname + ".mp3"); // C:/upload/에 랜덤한 이름으로 저장
                f.createNewFile();
                OutputStream outputStream = new FileOutputStream(f);
                while ((read =is.read(bytes)) != -1) {
                    outputStream.write(bytes, 0, read);
                }
                is.close();
            } else {  // 오류 발생
                br = new BufferedReader(new InputStreamReader(con.getErrorStream()));
                String inputLine;
                StringBuffer response = new StringBuffer();
                while ((inputLine = br.readLine()) != null) {
                    response.append(inputLine);
                }
                br.close();
                System.out.println(response.toString());
            }
        } catch (Exception e) {
            System.out.println(e);
        }
	}
	
	
	// 파일 경로 및 언어를 전달 받아
	// 저장된 파일명(voiceFileName) 반환
	public String clovaTextToSpeech2(String filePathName, String language) {
		String clientId = "86au6kn68u";//애플리케이션 클라이언트 아이디값";
        String clientSecret = "HFv2tMkPNhrTzOsxXJ5eGdteQfkWJC90UMnsQXeA";//애플리케이션 클라이언트 시크릿값";
        
        String voiceFileName = "";
        
        try {
        	
        	// 전달 받은 파일에서 텍스트를 추출하기 위한 fileRead() 함수 사용
        	// 함수에게 전달하고 텍스트 받음
        	String fileContents = fileRead(filePathName);
        	
        	// "만나서 반갑습니다." 라는 글자를 '인코딩해서' 전달
            String text = URLEncoder.encode(fileContents, "UTF-8"); // 13자
            String apiURL = "https://naveropenapi.apigw.ntruss.com/tts-premium/v1/tts";
            URL url = new URL(apiURL);
            HttpURLConnection con = (HttpURLConnection)url.openConnection();
            con.setRequestMethod("POST");
            con.setRequestProperty("X-NCP-APIGW-API-KEY-ID", clientId);
            con.setRequestProperty("X-NCP-APIGW-API-KEY", clientSecret);
            // post request
            String postParams = "speaker=" + language + "&volume=0&speed=0&pitch=0&format=mp3&text=" + text;
            con.setDoOutput(true);
            DataOutputStream wr = new DataOutputStream(con.getOutputStream());
            wr.writeBytes(postParams);
            wr.flush();
            wr.close();
            int responseCode = con.getResponseCode();
            BufferedReader br;
            if(responseCode==200) { // 정상 호출
                InputStream is = con.getInputStream();
                int read = 0;
                byte[] bytes = new byte[1024];
                // 랜덤한 이름으로 mp3 파일 생성
                String tempname = Long.valueOf(new Date().getTime()).toString();
                // File f = new File(tempname + ".mp3");
                
                voiceFileName = "tts_" + tempname + ".mp3"; // 음성 파일 이름
                File f = new File("C:/upload/" + voiceFileName); // C:/upload/에 랜덤한 이름으로 저장
                f.createNewFile();
                
                OutputStream outputStream = new FileOutputStream(f);
                while ((read =is.read(bytes)) != -1) {
                    outputStream.write(bytes, 0, read);
                }
                is.close();
            } else {  // 오류 발생
                br = new BufferedReader(new InputStreamReader(con.getErrorStream()));
                String inputLine;
                StringBuffer response = new StringBuffer();
                while ((inputLine = br.readLine()) != null) {
                    response.append(inputLine);
                }
                br.close();
                System.out.println(response.toString());
            }
        } catch (Exception e) {
            System.out.println(e);
        }
        
        return voiceFileName; // 저장된 음성 파일명 반환
	}

	// 파일 경로를 전달받아서, 파일 내 텍스트 추출해서 반환하는 함수
	public String fileRead(String filePathName) {
		String result = "";
		
		try {
			File file = new File(filePathName);
			FileReader fr = new FileReader(file);
			
			BufferedReader br = new BufferedReader(fr);
			String line = "";
			while((line = br.readLine()) != null){
				result += line;
			}
			br.close();			
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		System.out.println(result); // 출력해서 확인
		return result; // 저장된 음성 파일명 반환
	}
}

ttsView.jsp

<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
 <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core" %>
<!DOCTYPE html>
<html>
	<head>
		<meta charset="UTF-8">
		<title>TTS</title>
		<script src="<c:url value='/js/jquery-3.6.0.min.js'/>"></script>
		<script src="<c:url value='/js/tts.js'/>"></script>		
		
	</head>
	<body>
		<!--  파일 업로드 -->
		<h3>TTS : 텍스트를 음성으로 변환</h3>
		<form id="ttsForm" enctype="multipart/form-data">
			파일 : <input type="file" id="uploadFile" name="uploadFile"><br><br>
			
			언어 : <select name="language">
						<option value="nara">한국어, 여성</option>
						<option value="jinho">한국어, 남성</option>
						<option value="nhajun">한국어, 아동(남)</option>
						<option value="ndain">한국어, 아동(여)</option>
						<option value="clara">영어, 여성</option>
						<option value="matt">영어, 남성</option>
						<option value="shinji">일본어, 남성</option>
						<option value="carmen">스페인어, 여성</option>
						<option value="meimei">중국어, 여성</option>	
					</select><br><br>
			
			<input type="submit" value="결과 확인">		
		</form>
		<br><br>
		
		<!-- 결과 출력  -->	
		<h3>TTS : 텍스트를 음성으로 반환한 결과</h3> 	
		<div id="resultDiv"></div><br><br>
		
		<div><audio preload="auto" controls></audio></div>
		
		<br><br>
		<a href="/">index 페이지로 이동</a>
		
	</body>
</html>

tts.js

/**
 * tts.js
 */
 
 $(function(){
	
	$('#ttsForm').on('submit', function(event){	
		event.preventDefault();
		 var formData = new FormData($('#ttsForm')[0]);
		 
		 // 업로드된 파일명 알아오기 (텍스트파일)
		 var fileName = $('#uploadFile').val().split("\\").pop();
		// alert($('#uploadFile').val());
		 
		$.ajax({
			url:"clovaTTS",
			enctype:'multipart/form-data',
			type:"post",
			data:formData,
			processData: false,  // 필수
			contentType: false,  // 필수
			success:function(result){
				$('audio').prop("src", '/voice/' + result);
				$('#resultDiv').text(result);   // 저장된 '음성 파일명' 출력
			},
			error:function(e){
				alert("오류가 발생했습니다." + e)
			}
		});
		
	});		
});

APIRestController

@Autowired
private STTService sttService;

// language 언어 선택 추가
		@RequestMapping("/clovaTTS")
		public String  clovaTTS(@RequestParam("uploadFile") MultipartFile file,
												  @RequestParam("language") String language) {		
			
			String result = "";
			
			try {
				// 1. 파일 저장 경로 설정 : 실제 서비스되는 위치 (프로젝트 외부에 저장)
				String uploadPath = "C:/upload/";
				
				// 2. 원본 파일 이름 알아오기
				String originalFileName = file.getOriginalFilename();
				String filePathName = uploadPath + originalFileName;
				
				// 3. 파일 생성
				File file1 = new File(filePathName);
				
				// 4. 서버로 전송
				file.transferTo(file1);				
				
				// 서비스에 파일 path와 파일명 전달  -> 서비스 메소드에서 변경
				// 서비스에서 저장된 파일명(result) 받아오기
				result = ttsService.clovaTextToSpeech2(filePathName, language);			
			
			} catch (Exception e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
			return result;
		}

저작자표시 비영리 변경금지