In our projects, some times we need to compare files and delete or move the duplicate files to another folder.LINQ (Language Integrated Query) is a Microsoft programming model and methodology that essentially adds formal query capabilities into Microsoft .NET-based programming languages. LINQ will help to check the duplicates files and return the list of duplicate files.
|
Please take a look at the code snippet given below :
|
public void RemoveDuplicateFiles() { string destinationFile = string.Empty; List<string> duplicatedFileList = new List<string>(); duplicatedFileList = GetDuplicatedFileList(@"E:\Backup\Test", "*.txt"); for (int iCounter = 0; iCounter < duplicatedFileList.Count(); ++iCounter) { try { destinationFile = duplicatedFileList[iCounter].Replace("Test", "Movedfiles"); File.Move(duplicatedFileList[iCounter], destinationFile); } catch (FileNotFoundException ex) { // Handle error ex.ToString(); } } } public List<string> GetDuplicatedFileList(string folderPath, string fileExtension) { List<string> duplicatedFileList = new List<string>(); try { duplicatedFileList = Directory.GetFiles(folderPath, fileExtension) .Select( f => new { FileName = f, FileHash = GetFileHash(f) }) .GroupBy(f => f.FileHash) .Select(g => new { FileHash = g.Key, Files = g.Select(z => z.FileName).ToList() }) .SelectMany(f => f.Files.Skip(1)) .ToList(); } catch (Exception ex) { //Handle exception ex.ToString(); } return duplicatedFileList; } public string GetFileHash(string fileName) { string fileHash = string.Empty; try { using (FileStream fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read)) { fileHash = Encoding.UTF8.GetString(new SHA1Managed().ComputeHash(fileStream)); } } catch (FileNotFoundException ex) { // Handle your exceptions ex.ToString(); } catch (Exception ex) { // Handle your exceptions ex.ToString(); } return fileHash; }