1
0
mirror of https://github.com/mbirth/tcl_update_db.git synced 2024-12-26 04:24:07 +00:00
tcl_update_db/bin/clean_dupes.php

42 lines
1.1 KiB
PHP
Raw Permalink Normal View History

2017-12-16 21:15:12 +00:00
#!/usr/bin/env php
<?php
2017-12-18 14:02:24 +00:00
$bkup_dir = __DIR__ . '/../data/';
2017-12-16 21:15:12 +00:00
2018-01-05 14:14:47 +00:00
$ansi_csi = chr(27) . '[';
$ansi_updel = $ansi_csi . 'F' . $ansi_csi . 'K';
echo 'Reading file list ...';
2017-12-16 21:15:12 +00:00
$file_list = glob($bkup_dir . '*.xml');
2018-01-06 16:42:11 +00:00
$num_total = count($file_list);
$num_deleted = 0;
echo 'found ' . $num_total . ' files.' . PHP_EOL;
2018-01-05 14:14:47 +00:00
echo 'Searching for duplicates ...' . PHP_EOL . PHP_EOL;
2017-12-16 21:15:12 +00:00
$hashes = array();
2018-01-05 14:14:47 +00:00
foreach ($file_list as $i => $file) {
echo $ansi_updel . ($i+1) . '/' . count($file_list) . PHP_EOL;
2017-12-16 21:15:12 +00:00
$filename = basename($file);
$file_hash = sha1_file($file);
if (isset($hashes[$file_hash])) {
$old_file = $hashes[$file_hash];
if (md5_file($file) == md5_file($bkup_dir . $old_file)) {
2018-01-05 14:14:47 +00:00
echo $ansi_updel . 'Duplicate file: ' . $filename . ' (first: ' . $old_file . ')' . PHP_EOL . PHP_EOL;
2017-12-16 21:15:12 +00:00
unlink($file);
2018-01-06 16:42:11 +00:00
$num_deleted++;
2017-12-16 21:15:12 +00:00
continue;
}
2018-01-05 14:14:47 +00:00
echo $ansi_updel . 'Possible SHA1 collision?' . PHP_EOL . PHP_EOL;
2017-12-16 21:15:12 +00:00
}
$hashes[$file_hash] = $filename;
2018-01-05 14:14:47 +00:00
flush();
2017-12-16 21:15:12 +00:00
}
2018-01-05 14:14:47 +00:00
2018-01-06 16:42:11 +00:00
echo count($file_list) . ' files processed. ' . $num_deleted . ' deleted. ' . ($num_total - $num_deleted) . ' left.' . PHP_EOL;
2018-01-05 14:14:47 +00:00