diff --git a/python/quick-chunk b/python/quick-chunk new file mode 100755 index 0000000..cf2ee98 --- /dev/null +++ b/python/quick-chunk @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +""" +Will break apart large files into smaller files +""" + +import argparse +import time + + +def parse_args(): + argp = argparse.ArgumentParser( + description="Will break a large file into smaller files" + ) + + argp.add_argument( + 'large_file', + help="Full path for large file" + ) + + argp.add_argument( + '--prefix', '-p', + default='small', + help="Prefix for the smaller files (default: %(default)s)" + ) + + argp.add_argument( + '--extension', '-x', + default='txt', + help="Extension for the smaller files (default: %(default)s)" + ) + + argp.add_argument( + '-s', '--size', + default='10K', + help="File size limits (5K, 10M, 2G)" + ) + + return argp.parse_args() + + +def main(): + args = parse_args() + + file_count = 0 + + # get size for chunks + size_value = args.size + size_value_parts = re.match(r'(\d+)([BbKkMmGgTt]?)', size_value).groups() + if size_value_parts[1]: + factor = ( + (size_value_parts[1].lower() == 'k' and 1) or + (size_value_parts[1].lower() == 'm' and 2) or + (size_value_parts[1].lower() == 'g' and 3) or + (size_value_parts[1].lower() == 't' and 4) + ) + size_limit = int(size_value_parts[0]) * 1024**factor + else: + size_limit - int(size_value_parts[0]) + + + with open(args.large_file) as large_file: + + while chunk := large_file.read(size_limit): + file_count = file_count + 1 + output_filename = f"{args.prefix}-{file_count}.{args.extension}" + + with open(output_filename, 'w') as small_file: + small_file.write(chunk) + + +if __name__ == "__main__": + main() +