#!/usr/bin/env python3

"""
Will break apart large files into smaller files
"""

import argparse
import re
import os
import glob


def parse_args():
    argp = argparse.ArgumentParser(
            description="Will break a large file into smaller files"
    )

    argp.add_argument(
            'large_file',
            help="Full path for large file"
    )

    argp.add_argument(
            '--prefix', '-p',
            default='small',
            help="Prefix for the smaller files (default: %(default)s)"
    )

    argp.add_argument(
            '--extension', '-x',
            default='txt',
            help="Extension for the smaller files (default: %(default)s)"
    )

    argp.add_argument(
            '--size', '-s',
            default='10K',
            help="File size limits (5K, 10M, 2G)"
    )

    argp.add_argument(
            '--old', '-o',
            action='store_true',
            help="Reuse existing small files; default behavior is to remove existing files for new ones"
    )
            
    # Future functionality:
    # argp.add_argument(
    #         '--count', '-c',
    #         type=int,
    #         default=1,
    #         help="Number of files to generate (default: %(default)s)"
    # )

    return argp.parse_args()


def main():
    args = parse_args()

    file_complete = False
    file_count = 1

    # get the chunk file size
    size_value = args.size
    size_value_parts = re.match(r'(\d+)([BbKkMmGgTt]?)', size_value).groups()
    if size_value_parts[1]:
        factor = (
                (size_value_parts[1].lower() == 'k' and 1) or
                (size_value_parts[1].lower() == 'm' and 2) or
                (size_value_parts[1].lower() == 'g' and 3) or
                (size_value_parts[1].lower() == 't' and 4)
        )
        size_limit = int(size_value_parts[0]) * 1024**factor
    else:
        size_limit - int(size_value_parts[0])

    if not args.old:
        print("Removing previous files...")
        previous_files = glob.glob(f"{args.prefix}-*.{args.extension}")

        for previous_file in previous_files:
            os.remove(previous_file)

    print(f"Begin chunking large file {args.large_file}...")
    with open(args.large_file) as large_file:
        for line in large_file:
            output_filename = f"{args.prefix}-{file_count:00005}.{args.extension}"

            if not file_complete:
                file_mode = 'a' if os.path.isfile(output_filename) else 'w'

                with open(output_filename, file_mode) as small_file:
                    small_file.write(line)

                with open(output_filename, 'r') as small_file:
                    small_file.seek(0, os.SEEK_END)
                    output_file_size = small_file.tell()

                if output_file_size > size_limit:
                    file_complete = True

            else:
                file_complete = False
                file_count += 1

    print(f"Large file {args.large_file} chunked into {file_count} files no bigger than {args.size}")


if __name__ == "__main__":
    main()