10.7 upload

ADD file via upload
10.7 upload
2022-10-07 18:35:00 +08:00 · 2022-10-07 18:34:31 +08:00 · 2022-10-07 18:33:27 +08:00
3 changed files with 440 additions and 0 deletions
--- a/pydata-huang/层次聚类和可视化.py
+++ b/pydata-huang/层次聚类和可视化.py
@ -0,0 +1,122 @@
+# This is a sample Python script.
+
+# Press ⌃R to execute it or replace it with your code.
+# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
+import os
+import pandas as pd
+filePath= "file1"   #输入文件夹
+bigname=111
+# -*- coding: utf-8 -*-
+import numpy as np
+from scipy.cluster.hierarchy import dendrogram, linkage, fcluster, set_link_color_palette
+from matplotlib import pyplot as plt
+
+def os_file(path):   #遍历输入文件夹有多少个文件
+    filenames = os.listdir(path)
+
+    for filename in filenames:
+        print(filename)
+        filename1=filename[0:-5]    #删掉了文件后缀
+        print(filename1)
+        if filename!=".DS_Store":
+            file_reader(filePath + "/" + filename,filename1)
+
+
+def file_reader(path,name):
+    df1 = pd.read_excel(path, sheet_name="Sheet1")  #输入的文件必须放在Sheet1
+    df1 = np.array(df1)
+    importance=df1[-1,:]        #获取最后一行的所有列信息（权重）
+    importance = np.delete(importance, 0, axis=0)
+    df1=df1[0:-1,:]      #获取除了权重行的所有数据
+    #print(importance)
+
+    # 把word和数组分开
+    word = df1[:, 0]    #获取第一列所有行（词）
+    data = np.delete(df1, 0, axis=1)   #获取除第一列的其他列信息（打分）
+   # print("data1")
+    #print(data)
+    data=data*importance  #权重与数值相乘
+    #print(word)
+    #print("data2")
+    #print(data)
+    nums,indics=hierarchy_cluster(data,word,name)
+    print(indics)
+    for i in range(len(indics)):
+        group = "为一组"
+        for j in range(len(indics[i])):
+            group=word[indics[i][j]]+" "+group
+        print(group)
+
+
+
+
+
+
+
+
+
+def hierarchy_cluster(data,word,name, method='complete', threshold=600.0):  #complete-linkage
+    '''层次聚类
+
+    Arguments:
+        data [[0, float, ...], [float, 0, ...]] -- 文档 i 和文档 j 的距离
+
+    Keyword Arguments:
+        method {str} -- [linkage的方式： single、complete、average、centroid、median、ward] (default: {'average'})
+        threshold {float} -- 聚类簇之间的距离
+    Return:
+        cluster_number int -- 聚类个数
+        cluster [[idx1, idx2,..], [idx3]] -- 每一类下的索引
+    '''
+    data = np.array(data)
+    plt.figure(figsize=(10, 15), dpi=300)      #代表宽和高的尺寸，dpi代表分辨率
+    Z = linkage(data, method=method,metric='euclidean')      #欧式距离公式
+
+    cluster_assignments = fcluster(Z, threshold, criterion='distance')
+
+
+    num_clusters = cluster_assignments.max()
+
+    indices = get_cluster_indices(cluster_assignments)
+    z = linkage(data, method='ward')
+    print(z.shape)
+    dendrogram(z, labels=word, color_threshold=80,orientation='right', leaf_font_size=8,above_threshold_color='black')
+    set_link_color_palette(['#0000FF', '#4A766E', '#2F4F4F','871F78','FF7F00','E47833','FF6666','FFCCFF'])  #color_threshold是画线位置；orientation调成left图换方向
+    #n_clusters=10                                                     #leaf_font_size词间距
+    #color_threshold=25
+
+    plt.grid(True, which='minor', ls='--')        #minor代表不显示网格线，major代表显示
+    #name = "college"    #设置图片标题
+    plt.title(name, fontdict={'fontproperties':'Times New Roman','size': 10}) #标题的字体字号
+    plt.yticks(fontproperties='Times New Roman', size=8)  #设置y轴字体和字号，大小及加粗
+    plt.xticks(fontproperties='Times New Roman', size=8)
+    plt.plot(linewidth = '0.5')    #设置线粗细
+
+    f = plt.gcf()
+
+    f.savefig(name + ".png")
+    plt.show()
+    f.clear()
+
+    return num_clusters,indices
+
+
+def get_cluster_indices(cluster_assignments):   #层次聚类的实现函数
+    '''映射每一类至原数据索引
+
+    Arguments:
+        cluster_assignments 层次聚类后的结果
+
+    Returns:
+        [[idx1, idx2,..], [idx3]] -- 每一类下的索引
+    '''
+    n = cluster_assignments.max()
+    indices = []
+    for cluster_number in range(1, n + 1):
+        indices.append(np.where(cluster_assignments == cluster_number)[0])
+
+    return indices
+
+
+if __name__ == "__main__":
+    os_file(filePath)
--- a/pydata-huang/情感识别/huang-emotion-recognition-master/testEmotion.py
+++ b/pydata-huang/情感识别/huang-emotion-recognition-master/testEmotion.py
@ -0,0 +1,55 @@
+import os
+from random import shuffle
+from train import getFeature
+from drawRadar import draw
+import joblib
+import numpy as np
+import pyaudio
+import wave
+
+path = r'wave'
+
+wav_paths = []
+
+person_dirs = os.listdir(path)
+for person in person_dirs:
+    if person.endswith('txt'):
+        continue
+    emotion_dir_path = os.path.join(path, person)
+    emotion_dirs = os.listdir(emotion_dir_path)
+    for emotion_dir in emotion_dirs:
+        if emotion_dir.endswith('.ini'):
+            continue
+        emotion_file_path = os.path.join(emotion_dir_path, emotion_dir)
+        emotion_files = os.listdir(emotion_file_path)
+        for file in emotion_files:
+            if not file.endswith('wav'):
+                continue
+            wav_path = os.path.join(emotion_file_path, file)
+            wav_paths.append(wav_path)
+
+# 将语音文件随机排列
+#shuffle(wav_paths)
+
+model = joblib.load("classfier.m")
+
+p = pyaudio.PyAudio()
+for wav_path in wav_paths:
+    f = wave.open(wav_path, 'rb')
+    stream = p.open(
+        format=p.get_format_from_width(f.getsampwidth()),
+        channels=f.getnchannels(),
+        rate=f.getframerate(),
+        output=True)
+    data = f.readframes(f.getparams()[3])
+    stream.write(data)
+    stream.stop_stream()
+    stream.close()
+    f.close()
+    data_feature = getFeature(wav_path, 48)
+    print(model.predict([data_feature]))
+    print(model.predict_proba([data_feature]))
+    labels = np.array(['angry', 'Delate', 'disgust', 'fear', 'happy','neutral','sad','surprised','TS'])
+    draw(model.predict_proba([data_feature])[0], labels, 6)
+
+p.terminate()
--- a/pydata-huang/情感识别/huang-emotion-recognition-master/train.py
+++ b/pydata-huang/情感识别/huang-emotion-recognition-master/train.py
@ -0,0 +1,263 @@
+
+import librosa
+import os
+from random import shuffle
+import numpy as np
+import pandas as pd
+import pandas.core.ops
+from sklearn import svm
+import joblib
+import sklearn
+import logmmse
+import wave
+
+from natsort import natsorted
+import warnings
+warnings.filterwarnings('ignore')
+
+path = r'trainset/casio2'
+EMOTION_LABEL = {
+    'angry': '1',
+    'Delate': '2',
+    'disgust': '3',
+    'fear': '4',
+    'happy': '5',
+    'neutral': '6',
+    'sad':'7',
+    'surprised':'8',
+    'TS':'9'
+}
+
+
+# C:误差项惩罚参数,对误差的容忍程度。C越大，越不能容忍误差
+# gamma：选择RBF函数作为kernel，越大，支持的向量越少；越小，支持的向量越多
+# kernel: linear, poly, rbf, sigmoid, precomputed
+# decision_function_shape: ovo, ovr(default)
+#
+# #
+
+'''
+这个模块包含了导入模块和svm模块
+导入模块需要librosa，始终有问题，草。
+'''
+
+def getFeature(path, mfcc_feature_num=16):
+    y, sr = librosa.load(path)
+
+    # 对于每一个音频文件提取其mfcc特征
+    # y:音频时间序列;
+    # n_mfcc:要返回的MFCC数量
+    mfcc_feature = librosa.feature.mfcc(y, sr, n_mfcc=16)
+    zcr_feature = librosa.feature.zero_crossing_rate(y)
+    energy_feature = librosa.feature.rms(y)
+    rms_feature = librosa.feature.rms(y)
+
+    mfcc_feature = mfcc_feature.T.flatten()[:mfcc_feature_num]
+    zcr_feature = zcr_feature.flatten()
+    energy_feature = energy_feature.flatten()
+    rms_feature = rms_feature.flatten()
+
+    zcr_feature = np.array([np.mean(zcr_feature)])
+    energy_feature = np.array([np.mean(energy_feature)])
+    rms_feature = np.array([np.mean(rms_feature)])
+
+    data_feature = np.concatenate((mfcc_feature, zcr_feature, energy_feature,
+                                   rms_feature))
+
+    return data_feature
+
+
+def deNoise(path):
+    f = wave.open(path, "r")
+    params = f.getparams()
+    nchannels, sampwidth, framerate, nframes = params[:4]
+    #print("nchannels:", nchannels, "sampwidth:", sampwidth, "framerate:", framerate, "nframes:", nframes)
+    data = f.readframes(nframes)
+    f.close()
+    data = np.fromstring(data, dtype=np.short)
+
+    # 降噪
+    data = logmmse.logmmse(data=data, sampling_rate=framerate)
+
+
+    # 保存音频
+    file_save = "save"+path
+    nframes = len(data)
+    f = wave.open(file_save, 'w')
+    f.setparams((1, 2, framerate, nframes, 'NONE', 'NONE'))  # 声道，字节数，采样频率，*，*
+    # print(data)
+    f.writeframes(data)  # outData
+    f.close()
+
+def getData(mfcc_feature_num=16):
+    """找到数据集中的所有语音文件的特征以及语音的情感标签"""
+    wav_file_path = []
+    person_dirs = os.listdir(path)
+    for person in person_dirs:
+        if person.endswith('txt'):
+            continue
+        emotion_dir_path = os.path.join(path, person)
+        emotion_dirs = os.listdir(emotion_dir_path)
+        for emotion_dir in emotion_dirs:
+            if emotion_dir.endswith('.ini'):
+                continue
+            emotion_file_path = os.path.join(emotion_dir_path, emotion_dir)
+            emotion_files = os.listdir(emotion_file_path)
+            for file in emotion_files:
+                if not file.endswith('wav'):
+                    continue
+                wav_path = os.path.join(emotion_file_path, file)
+                wav_file_path.append(wav_path)
+
+    # 将语音文件随机排列
+    shuffle(wav_file_path)
+    data_feature = []
+    data_labels = []
+
+
+    for wav_file in wav_file_path:
+
+        #deNoise(wav_file)
+
+        data_feature.append(getFeature("save"+wav_file, mfcc_feature_num))
+        data_labels.append(int(EMOTION_LABEL[wav_file.split('/')[-2]]))
+
+    return np.array(data_feature), np.array(data_labels)
+
+
+def getData1(mfcc_feature_num,path):
+    """找到数据集中的所有语音文件的特征以及语音的情感标签"""
+
+
+    wav_file_path = []
+    person_dirs = os.listdir(path)
+    for person in person_dirs:
+        if person.endswith('txt') :
+            continue
+        emotion_dir_path = os.path.join(path, person)
+        emotion_dirs = os.listdir(emotion_dir_path)
+        for emotion_dir in emotion_dirs:
+            if emotion_dir.endswith('.ini'):
+                continue
+            emotion_file_path = os.path.join(emotion_dir_path, emotion_dir)
+            emotion_files = os.listdir(emotion_file_path)
+            emotion_files=natsorted(emotion_files)
+            for file in emotion_files:
+                if not file.endswith('wav'):
+                    continue
+                wav_path = os.path.join(emotion_file_path, file)
+                wav_file_path.append(wav_path)
+
+    # 将语音文件随机排列
+
+    data_feature = []
+    data_labels = []
+
+
+    for wav_file in wav_file_path:
+
+        data_feature.append(getFeature(wav_file, mfcc_feature_num))
+
+    return np.array(data_feature),wav_file_path
+
+def train():
+    # 使用svm进行预测
+    best_acc = 0
+    best_mfcc_feature_num = 0
+    best_C = 0
+
+    for C in range(13, 20):
+        for i in range(40, 55):
+            data_feature, data_labels = getData(i)
+            split_num = 200
+            train_data = data_feature[:split_num, :]
+            train_label = data_labels[:split_num]
+            test_data = data_feature[split_num:, :]
+            test_label = data_labels[split_num:]
+            clf = svm.SVC(
+                decision_function_shape='ovo',
+                kernel='rbf',
+                C=C,
+                gamma=0.0003,
+                probability=True)
+            print("train start")
+            clf.fit(train_data, train_label)
+            print("train over")
+            print(C, i)
+            acc_dict = {}
+            for test_x, test_y in zip(test_data, test_label):
+                pre = clf.predict([test_x])[0]
+                if pre in acc_dict.keys():
+                    continue
+                acc_dict[pre] = test_y
+            acc = sklearn.metrics.accuracy_score(
+                clf.predict(test_data), test_label)
+            if acc > best_acc:
+                best_acc = acc
+                best_C = C
+                best_mfcc_feature_num = i
+                print('best_acc', best_acc)
+                print('best_C', best_C)
+                print('best_mfcc_feature_num', best_mfcc_feature_num)
+                print()
+
+
+            # 保存模型
+            joblib.dump(clf,
+                        'Models/C_' + str(C) + '_mfccNum_' + str(i) + '.m')
+
+    print('most_best_acc', best_acc)
+    print('best_C', best_C)
+    print('best_mfcc_feature_num', best_mfcc_feature_num)
+
+
+def getData2(path):
+    data_features,wavefile = getData1(52,path)
+    label=[]
+    for data_feature in data_features:
+        new_svm2 = joblib.load('Models/C_16_mfccNum_52.m')
+
+        kk=new_svm2.predict(data_feature.reshape(1,-1))
+        label.append(str(kk[0]))
+
+    print(label)
+    return label,wavefile
+
+def run():
+    paths = ["wav/1-1", "wav/1-2", "wav/1-5", "wav/1-7", "wav/1-14"]
+
+    for path in paths:
+        label, wavefile = getData2(path)
+        emotions = []
+        for labe in label:
+            if labe == "1":
+                emotions.append('angry')
+            elif labe == "2":
+                emotions.append('Delate')
+            elif labe == "3":
+                emotions.append('disgust')
+            elif labe == "4":
+                emotions.append('fear')
+            elif labe == "5":
+                emotions.append('happy')
+            elif labe == "6":
+                emotions.append('neutral')
+            elif labe == "7":
+                emotions.append('sad')
+            elif labe == "8":
+                emotions.append('surprised')
+            elif labe == "9":
+                emotions.append('TS')
+
+        c = {"label": label, "wavefile": wavefile, "emotions": emotions}
+        mySeries = pandas.DataFrame(c)
+        writer = pd.ExcelWriter(path + ".xlsx")  # 初始化一个writer
+        mySeries.to_excel(writer, float_format='%.5f')  # table输出为excel, 传入writer
+        writer.save()  # 保存
+
+if __name__ == "__main__":
+
+    train()
+
+
+
Author	SHA1	Message	Date
artieyue	491e52eb81	10.7 upload	2022-10-07 18:35:00 +08:00
artieyue	a1fa4d8a31	ADD file via upload	2022-10-07 18:34:31 +08:00
artieyue	6cfca9f940	10.7 upload	2022-10-07 18:33:27 +08:00