python查找重复图片并删除（图片去重）

2019-11-25 12:21:31

字体：大中小

来源：转载

供稿：网友

本文实例为大家分享了python查找重复图片并删除的具体代码，供大家参考，具体内容如下

和网络爬虫配套的，也可单独使用，从网上爬下来的图片重复太多，代码支持识别不同尺寸大小一致的图片，并把重复的图片删除，只保留第一份。

# -*- coding: utf-8 -*-import cv2import numpy as npimport os,sys,typesdef cmpandremove2(path):  dirs = os.listdir(path)  dirs.sort()  if len(dirs) <= 0:    return  dict={}  for i in dirs:    prepath = path + "/" + i    preimg = cv2.imread(prepath)    if type(preimg) is types.NoneType:      continue    preresize = cv2.resize(preimg, (8,8))    pregray = cv2.cvtColor(preresize, cv2.COLOR_BGR2GRAY)    premean = cv2.mean(pregray)[0]    prearr = np.array(pregray.data)    for j in range(0,len(prearr)):      if prearr[j] >= premean:        prearr[j] = 1      else:        prearr[j] = 0    print "get", prepath    dict[i] = prearr  dictkeys = dict.keys()  dictkeys.sort()  index = 0  while True:    if index >= len(dictkeys):      break    curkey = dictkeys[index]    dellist=[]    print curkey    index2 = index    while True:      if index2 >= len(dictkeys):        break      j = dictkeys[index2]      if curkey == j:        index2 = index2 + 1        continue      arr1 = dict[curkey]      arr2 = dict[j]      diff = 0      for k in range(0,len(arr2)):        if arr1[k] != arr2[k]:          diff = diff + 1      if diff <= 5:        dellist.append(j)      index2 = index2 + 1    if len(dellist) > 0:      for j in dellist:        file = path + "/" + j        print "remove", file        os.remove(file)        dict.pop(j)      dictkeys = dict.keys()      dictkeys.sort()    index = index + 1def cmpandremove(path):  index = 0  flag = 0  dirs = os.listdir(path)  dirs.sort()  if len(dirs) <= 0:    return 0  while True:    if index >= len(dirs):      break    prepath = path + dirs[index]    print prepath    index2 = 0    preimg = cv2.imread(prepath)    if type(preimg) is types.NoneType:      index = index + 1      continue    preresize = cv2.resize(preimg, (8, 8))    pregray = cv2.cvtColor(preresize, cv2.COLOR_BGR2GRAY)    premean = cv2.mean(pregray)[0]    prearr = np.array(pregray.data)    for i in range(0, len(prearr)):      if prearr[i] >= premean:        prearr[i] = 1      else:        prearr[i] = 0    removepath = []    while True:      if index2 >= len(dirs):        break      if index2 != index:        curpath = path + dirs[index2]        # print curpath        curimg = cv2.imread(curpath)        if type(curimg) is types.NoneType:          index2 = index2 + 1          continue        curresize = cv2.resize(curimg, (8, 8))        curgray = cv2.cvtColor(curresize, cv2.COLOR_BGR2GRAY)        curmean = cv2.mean(curgray)[0]        curarr = np.array(curgray.data)        for i in range(0, len(curarr)):          if curarr[i] >= curmean:            curarr[i] = 1          else:            curarr[i] = 0        diff = 0        for i in range(0, len(curarr)):          if curarr[i] != prearr[i]:            diff = diff + 1        if diff <= 5:          print 'the same'          removepath.append(curpath)          flag = 1      index2 = index2 + 1    index = index + 1    if len(removepath) > 0:      for file in removepath:        print "remove", file        os.remove(file)      dirs = os.listdir(path)      dirs.sort()      if len(dirs) <= 0:        return 0        # index = 0  return flagpath = 'pics/'cmpandremove(path)

以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持武林网。

上一篇：用Anaconda安装本地python包的方法及路径问题(图文)

下一篇：python3的print()函数的用法图文讲解