SQL对重复数据的处理
程序员文章站
2022-06-02 15:54:36
...
这是常遇到的一个问题,这里的重复数据有两种:
一种是未设置主键而导致的重复,这种属于表设计缺陷,遇到的较少;
第二种就是主键不重复,其它字段重复。
一、有主键的情况
1、针对单个字段
1.1创建表,插入数据
CREATE DATABASE test;
USE test;
CREATE TABLE USER(
id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100)
) CHARSET=utf8 ENGINE=INNODB;
INSERT
INTO USER(name)
VALUES('admin'),('admin'),('ad');
1.2查询有重复数据的值,并显示重复次数
SELECT name FROM USER GROUP BY name HAVING COUNT(*)>1
SELECT COUNT(*) FROM USER GROUP BY name HAVING COUNT(*)>1;
1.3将重复的数据删除,只保留一条
DELETE FROM USER
WHERE id NOT IN
(
SELECT a.id FROM
(
SELECT MIN(id) id FROM USER GROUP BY NAME HAVING COUNT(NAME)>1
)a
)
AND NAME IN
(
SELECT b.name FROM
(
SELECT NAME FROM USER GROUP BY NAME HAVING COUNT(NAME)>1
)b
)
注意:错误写法(You can't specify target table 'user' for update in FROM clause错误,因为在mysql中不允许先select出同一表中的某些值,再update这个表(在同一语句中))
DELETE FROM USER
WHERE NAME IN
(SELECT NAME FROM USER GROUP BY NAME HAVING COUNT(NAME)>1)
AND id NOT IN
(SELECT MIN(id) FROM USER GROUP BY NAME HAVING COUNT(NAME)>1);
2、针对单个字段
2.1创建表,插入数据
CREATE DATABASE test;
USE test;
CREATE TABLE USER(
id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY,
username VARCHAR(100),
email VARCHAR(100)
) CHARSET=utf8 ENGINE=INNODB;
INSERT
INTO USER(username, email)
VALUES('admin','e-admin'),('admin','e-admin'),('ad','e-ad');
2.2查找表中多余的重复记录(多个字段)
SELECT *
FROM USER
WHERE (username,email) IN
(SELECT username,email FROM USER GROUP BY username,email HAVING COUNT(*)>1)
2.3查找表中多余的重复记录(多个字段),不包含rowid最小的记录
SELECT *
FROM USER
WHERE (username,email) IN
(SELECT username,email FROM USER GROUP BY username,email HAVING COUNT(*)>1)
AND id NOT IN
(SELECT MIN(id) FROM USER GROUP BY username,email HAVING COUNT(*)>1)
2.4删除表中多余的重复记录(多个字段),只留有rowid最小的记录DELETE
FROM USER
WHERE id NOT IN
(
SELECT a.id FROM
(
SELECT * FROM USER GROUP BY username,email HAVING COUNT(*)>1
)a
)
2.5假删除表中多余的重复记录(多个字段),不包含rowid最小的记录UPDATE USER
SET user.show=0
WHERE id NOT IN
(
SELECT a.id FROM
(
SELECT * FROM USER GROUP BY username,email HAVING COUNT(*)>1
)a
)
二、无主键的情况
1、准备数据
1.1创建表,插入数据
CREATE DATABASE test;
USE test;
CREATE TABLE USER2(
id BIGINT NOT NULL AUTO_INCREMENT,
username VARCHAR(100),
email VARCHAR(100)
) CHARSET=utf8 ENGINE=INNODB;
INSERT
INTO USER2(id,NAME, email)
VALUES(1,'admin','e-admin'),(1,'admin','e-admin'),(2,'ad','e-ad');
SELECT * FROM user2;
1.2查询有重复数据的记录
SELECT * FROM USER2 GROUP BY id,NAME,email HAVING COUNT(*)>1;
1.3找出表中多余的重复记录(根据id判断)
SELECT *
FROM user2
WHERE id IN (
SELECT id FROM user2 GROUP BY id HAVING COUNT(id) > 1
)