The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

30 lignes
1.7 KiB

  1. #!/bin/bash
  2. function usage_exit {
  3. echo 'Usage: dedupe FILE1 FILE2' >&2
  4. echo >&2
  5. echo 'Prints all lines from FILE2 that do not appear in FILE1, in the order of FILE2.' >&2
  6. echo "WARNING: FILE1 has to be read into memory fully, and memory use scales with about a factor 40 of FILE1's size. If your files are sorted, use comm instead." >&2
  7. exit $1
  8. }
  9. if [[ "$1" == '-h' || "$1" == '--help' ]]; then usage_exit 0; fi
  10. if [[ $# -ne 2 ]]; then usage_exit 1; fi
  11. # Performance and memory comparison using <(seq 1000000 2048575) <(seq 1000000 2048575) (i.e. 8 MiB of input data, all lines in both files), median of 9 runs:
  12. #
  13. # Implementation | User time | Sys time | Peak RSS
  14. # | [s] | [s] | [MiB]
  15. # ---------------|-----------|----------|---------
  16. # AWK | 1.16 | 0.03 | 86.8
  17. # Perl | 0.90 | 0.06 | 149.6
  18. # Python | 0.58 | 0.06 | 112.6
  19. # grep | 0.36 | 0.07 | 216.9
  20. #
  21. # Exact command executed for these tests, with warmup:
  22. # { for i in {0..3}; do ./dedupe <(seq 1000000 2048575) <(seq 1000000 2048575) >/dev/null; done; for i in {0..8}; do /usr/bin/time -v ./dedupe <(seq 1000000 2048575) <(seq 1000000 2048575) 2> >(grep -F -e ' time ' -e 'Maximum resident' >&2) | cat >/dev/null; done; } |& sort
  23. #awk 'NR==FNR { s[$0]=1; next; } !($0 in s)' "$1" "$2"
  24. #perl -ne 'if (@ARGV == 1) { $seen{$_}=1; } else { print $_ if !(exists $seen{$_}); }' "$1" "$2"
  25. #python3 -c 'import sys'$'\n''s={}'$'\n''with open(sys.argv[1], "r") as fp:'$'\n'' for line in fp:'$'\n'' s[line]=True'$'\n''with open(sys.argv[2], "r") as fp:'$'\n'' for line in fp:'$'\n'' if line not in s:'$'\n'' print(line, end="")' "$1" "$2"
  26. grep -F -x -v -f "$1" "$2"