Managing and Delete duplicate records from a table
----------------------------------------------------
Toad -> Schema Browser (select table right click) -> Duplicate Data (Summary-view only) -> Duplicate Data (Editable)
To select all duplicate records from a table and delete them use the queries below:
table_with_duplicates(col1,col2,col3)
To get rid of the dupes, you want to identify the rows that are identical on -all- the fields in the table. So the query to ID the dupes would be something like:
1.Use subquery to delete duplicate rows
Here we see an example of using SQL to delete duplicate table rows using an SQL subquery to identify duplicate rows, manually specifying the join columns:
select * from table1 A
WHERE a.rowid > ANY (
SELECT B.rowid
FROM table1 B
WHERE A.id1= B.id1
and A.id2=B.id2
);
OR
2. Use self-join to delete duplicate rows
The most effective way to detect duplicate rows is to join the table against itself as shown below.
select
book_unique_id,
page_seq_nbr,
image_key
from
page_image a
where
rowid >
(select min(rowid) from page_image b
where
b.key1 = a.key1
and
b.key2 = a.key2
and
b.key3 = a.key3
);
Please note that you must specify all of the columns that make the row a duplicate in the SQL where clause. Once you have detected the duplicate rows, you may modify the SQL statement to remove the duplicates as shown below:
delete from
table_name a
where
a.rowid >
any (select b.rowid
from
table_name b
where
a.col1 = b.col1
and
a.col2 = b.col2
)
;
OR
delete from
table_name a
where
a.rowid >
any (select b.rowid
from
table_name b
where
(a.col1 = b.col1 or (a.col1 is null and b.col1 is null))
and
(a.col2 = b.col2 or (a.col2 is null and b.col2 is null))
)
;
select empno
from table_with_duplicates
group by empno
having count(*) > 1
OR
select empno,count(*)
from table_with_duplicates
group by empno
having count(*) > 1
OR
SELECT col1,col2,col3,COUNT(*)
FROM table_with_duplicates
GROUP BY col1,col2,col3
HAVING COUNT(*) > 1;
OR
--show all duplicate rows
SELECT col1,col2,col3
FROM table_with_duplicates
WHERE (id) IN
(SELECT id
FROM table_with_duplicates
GROUP BY id
HAVING COUNT(*) > 1)
ORDER BY id
SELECT *
FROM TABLE A
WHERE EXISTS (
SELECT 1
FROM TABLE
WHERE COLUMN_NAME = A.COLUMN_NAME
AND ROWID < A.ROWID
)
OR
--Select the rowid's greater than the minimum rowid for each key column.
select empno
from employee a
where a.rowid >
( select min(rowid)
from employee b
where a.empno=b.empno
);
OR
SELECT *
FROM table_with_duplicates a
WHERE ROWID NOT IN
(SELECT MAX(ROWID)
FROM table_with_duplicates b
where a.ID=b.ID
);
OR
SELECT * FROM table_with_duplicates a
WHERE ROWID NOT IN
(SELECT MIN(ROWID) FROM table_with_duplicates b
GROUP BY col1,col2,col3
);
DELETE FROM table_with_duplicates
WHERE ROWID NOT IN
(SELECT MAX(ROWID) FROM table_with_duplicates
GROUP BY col1, col2, col3);
OR
--Remove duplicate line_detail_ids
DELETE from line_details
WHERE rowid NOT IN
(SELECT MAX(rowid)
FROM line_detail
GROUP BY line_detail_id)
OR
delete employee a
where a.rowid >
(select min(rowid)
from employee b
where a.empno=b.empno);
References:
http://rdbms-insight.com/toolkit/del_dupes.php
https://cooldata.wordpress.com/2014/07/20/eliminate-your-duplicate-data-row-problems-with-simple-sql/
----------------------------------------------------
Toad -> Schema Browser (select table right click) -> Duplicate Data (Summary-view only) -> Duplicate Data (Editable)
To select all duplicate records from a table and delete them use the queries below:
table_with_duplicates(col1,col2,col3)
To get rid of the dupes, you want to identify the rows that are identical on -all- the fields in the table. So the query to ID the dupes would be something like:
1.Use subquery to delete duplicate rows
Here we see an example of using SQL to delete duplicate table rows using an SQL subquery to identify duplicate rows, manually specifying the join columns:
select * from table1 A
WHERE a.rowid > ANY (
SELECT B.rowid
FROM table1 B
WHERE A.id1= B.id1
and A.id2=B.id2
);
OR
2. Use self-join to delete duplicate rows
The most effective way to detect duplicate rows is to join the table against itself as shown below.
select
book_unique_id,
page_seq_nbr,
image_key
from
page_image a
where
rowid >
(select min(rowid) from page_image b
where
b.key1 = a.key1
and
b.key2 = a.key2
and
b.key3 = a.key3
);
Please note that you must specify all of the columns that make the row a duplicate in the SQL where clause. Once you have detected the duplicate rows, you may modify the SQL statement to remove the duplicates as shown below:
delete from
table_name a
where
a.rowid >
any (select b.rowid
from
table_name b
where
a.col1 = b.col1
and
a.col2 = b.col2
)
;
OR
delete from
table_name a
where
a.rowid >
any (select b.rowid
from
table_name b
where
(a.col1 = b.col1 or (a.col1 is null and b.col1 is null))
and
(a.col2 = b.col2 or (a.col2 is null and b.col2 is null))
)
;
select empno
from table_with_duplicates
group by empno
having count(*) > 1
OR
select empno,count(*)
from table_with_duplicates
group by empno
having count(*) > 1
OR
SELECT col1,col2,col3,COUNT(*)
FROM table_with_duplicates
GROUP BY col1,col2,col3
HAVING COUNT(*) > 1;
OR
--show all duplicate rows
SELECT col1,col2,col3
FROM table_with_duplicates
WHERE (id) IN
(SELECT id
FROM table_with_duplicates
GROUP BY id
HAVING COUNT(*) > 1)
ORDER BY id
SELECT *
FROM TABLE A
WHERE EXISTS (
SELECT 1
FROM TABLE
WHERE COLUMN_NAME = A.COLUMN_NAME
AND ROWID < A.ROWID
)
OR
--Select the rowid's greater than the minimum rowid for each key column.
select empno
from employee a
where a.rowid >
( select min(rowid)
from employee b
where a.empno=b.empno
);
OR
SELECT *
FROM table_with_duplicates a
WHERE ROWID NOT IN
(SELECT MAX(ROWID)
FROM table_with_duplicates b
where a.ID=b.ID
);
OR
SELECT * FROM table_with_duplicates a
WHERE ROWID NOT IN
(SELECT MIN(ROWID) FROM table_with_duplicates b
GROUP BY col1,col2,col3
);
DELETE FROM table_with_duplicates
WHERE ROWID NOT IN
(SELECT MAX(ROWID) FROM table_with_duplicates
GROUP BY col1, col2, col3);
OR
--Remove duplicate line_detail_ids
DELETE from line_details
WHERE rowid NOT IN
(SELECT MAX(rowid)
FROM line_detail
GROUP BY line_detail_id)
OR
delete employee a
where a.rowid >
(select min(rowid)
from employee b
where a.empno=b.empno);
References:
http://rdbms-insight.com/toolkit/del_dupes.php
https://cooldata.wordpress.com/2014/07/20/eliminate-your-duplicate-data-row-problems-with-simple-sql/
No comments:
Post a Comment