数据安全检索

大数据安全与隐私这门课的lab3,要求如下图:

avatar

方案:
• 使用了 pyope 库中的 OPE (Order Preserving Encryption) 对数据进行保序加密, 并将加密后的数据写入NE_encrypt.txt。
• 定义 decision_node 类来构建 KD 树。
• 实现了 travel_tree 函数，该函数能够在遍历 KD 树并进行近邻查询，并且还实现了距离计算函数 dist。

加密部分代码:

import numpy as np
from numpy import array
from pyope.ope import OPE, ValueRange

# 读入处理txt文件,返回二维数组，明文和密文
def LoadTxtMethod(filename):
    cipher = OPE(b'long key' * 2, in_range=ValueRange(0, 10000000),
                 out_range=ValueRange(0, 100000000))
    result_m = list()  # 明文点
    result_c = []
    for line in open(filename):  # 逐行打开文档.
        plain_text = line.split(' ', 1)
        plain_text[0] = float(plain_text[0])
        plain_text[1] = float(plain_text[1])
        plain_text_arr = np.array(plain_text)  # 转化数据格式
        result_m.append(plain_text_arr)  # 把第一列数据添加到result序列中
        cipher_text = []
        cipher_text.append(cipher.encrypt(int(plain_text[0] * 1000000)))
        cipher_text.append(cipher.encrypt(int(plain_text[1] * 1000000)))
        cipher_text_arr = np.array(cipher_text)  # 转化数据格式
        result_c.append(cipher_text_arr)
        print("加密" + str(cipher_text_arr[0]) + ',' + str(cipher_text_arr[1]))
    return array(result_c)


def WriteTxt(cipher_txt):
    with open('NE_encrypt.txt', 'a', encoding='utf-8') as f:
        for x in cipher_txt:
            print(type(x))
            strings = str(x[0]) + ' ' + str(x[1]) + '\n'
            f.write(strings)


if __name__ == "__main__":
    data = LoadTxtMethod('NE.txt')  # 调用上面数据处理程序
    WriteTxt(data)  # 加密之后的数据存放于cipherText.txt中

邻近搜索部分:

import numpy as np
from numpy import array
from pyope.ope import OPE, ValueRange



# 创建类，用于保存结点的值，左右子树，以及用于划分左右子树的切分轴
class decision_node:
    def __init__(self, value=None, col=None, rb=None, lb=None):
        self.value = value  # 点
        self.col = col  # 切分的维度
        self.rb = rb  # 右子树
        self.lb = lb  # 左子树


def LoadTxtMethod(filename):  # 传入形参,txt的名字.
    result = list()  # 创建要返回的数据.
    for line in open(filename):  # 逐行打开文档.
        data = line.split(' ', 1)
        data[0] = float(data[0])
        data[1] = float(data[1])
        data_float = np.array(data)
        result.append(data_float)  # 把第一列数据添加到result序列中
    return array(result)


# 切分点为坐标轴上的中值，求一个序列的中值
def median(x):
    n = len(x)
    x = list(x)
    x_order = sorted(x)
    return x_order[n // 2], x.index(x_order[n // 2])


# 以j列的中值划分数据，左小右大，j=节点深度%列数，列数这里是2
def build_tree(x, j=0):
    rb = []
    lb = []
    m, n = x.shape
    if m == 0: return None
    edge, row = median(x[:, j].copy())
    for i in range(m):
        if x[i][j] > edge:
            rb.append(i)
        if x[i][j] < edge:
            lb.append(i)
    rb_x = x[rb, :]
    lb_x = x[lb, :]
    rightBranch = build_tree(rb_x, (j + 1) % n)
    leftBranch = build_tree(lb_x, (j + 1) % n)
    return decision_node(x[row, :], j, rightBranch, leftBranch)


# 搜索树：输出目标点的近邻点
def travel_tree(node, aim):
    global pointlist  # 存储排序后的k近邻点和对应距离
    if node is None: return
    col = node.col
    if aim[col] > node.value[col]:  # 顺着树进行搜索，分类
        travel_tree(node.rb, aim)
    if aim[col] < node.value[col]:
        travel_tree(node.lb, aim)
    dis = dist(node.value, aim)
    if len(knears) < k:  # k是输入的要查询的个数，前k个点就是目标点x的k近邻
        knears.setdefault(tuple(node.value.tolist()), dis)  # 列表不能作为字典的键
        pointlist = sorted(knears.items(), key=lambda item: item[1], reverse=True)
    elif dis <= pointlist[0][1]:
        knears.setdefault(tuple(node.value.tolist()), dis)
        pointlist = sorted(knears.items(), key=lambda item: item[1], reverse=True)
    if node.rb is not None or node.lb is not None:
        # 对于父节点来说，如果目标点与其切分轴之间的距离不大于字典中各结点所对应距离的的最大值，便需要访问该父节点的另一个子节点
        if abs(aim[node.col] - node.value[node.col]) < pointlist[0][1]:
            if aim[node.col] < node.value[node.col]:
                travel_tree(node.rb, aim)
            if aim[node.col] > node.value[node.col]:
                travel_tree(node.lb, aim)
    return pointlist


def dist(x1, x2):  # 欧式距离的计算
    return ((np.array(x1) - np.array(x2)) ** 2).sum() ** 0.5

cipher = OPE(b'long key' * 2, in_range=ValueRange(0, 10000000),
                 out_range=ValueRange(0, 100000000))

# 保序加密
# 输入给的点，输出调用pyope库加密的点
def encryption(point):
    point[0] = float(point[0])
    point[1] = float(point[1])  # 把array中的点转化为float类型
    en_point = []
    en_point.append(cipher.encrypt(int(point[0] * 1000000)))
    en_point.append(cipher.encrypt(int(point[1] * 1000000)))
    en_point_arr = np.array(en_point)  # 转化数据格式

    return en_point_arr


# 保序解密
def decryption(point):
    point[0] = int(point[0])
    point[1] = int(point[1])
    de_point = []
    de_point.append(cipher.decrypt(point[0]) / 1000000)
    de_point.append(cipher.decrypt(point[1]) / 1000000)
    de_point_arr = np.array(de_point)

    return de_point_arr


if __name__ == "__main__":
    tmp = input('请输入目标点:')
    tmp = tmp.split(',')
    point_tmp = encryption(tmp)
    aim = []
    for num in point_tmp:
        aim.append(num)
    knears = {}
    k = int(input('请输入需要查询的点的个数'))
    file = 'NE_encrypt.txt'
    data_c = LoadTxtMethod(file)
    tree = build_tree(data_c)  # 构建KD tree

    pointlist = travel_tree(tree, aim)  # 对目标点进行检索
    i = 1
    for point in pointlist[-k:]:  # 里面存的有点，和欧式距离
        x = [point[0][0], point[0][1]]
        de_point = decryption(x)
        print("****************--------第" + str(i) + "条检索数据--------****************")
        print("检索结果: ", de_point)
        print("距离为:", dist(tmp, de_point))
        i = i + 1

结果

avatar