#!/usr/bin/env python3
import json
import random
import re

import alive_progress
from mock import Mock
import requests

from maigret.maigret import *
from maigret.result import MaigretCheckStatus
from maigret.sites import MaigretSite

URL_RE = re.compile(r"https?://(www\.)?")
TIMEOUT = 200


async def maigret_check(site, site_data, username, status, logger):
    query_notify = Mock()
    logger.debug(f'Checking {site}...')

    for username, status in [(username, status)]:
        results = await maigret(
            username,
            {site: site_data},
            logger,
            query_notify,
            timeout=TIMEOUT,
            forced=True,
            no_progressbar=True,
        )

        if results[site]['status'].status != status:
            if results[site]['status'].status == MaigretCheckStatus.UNKNOWN:
                msg = site_data.absence_strs
                etype = site_data.check_type
                context = results[site]['status'].context

                logger.debug(f'Error while searching {username} in {site}, must be claimed. Context: {context}')
                # if site_data.get('errors'):
                #     continue
                return False

            if status == MaigretCheckStatus.CLAIMED:
                logger.debug(f'Not found {username} in {site}, must be claimed')
                logger.debug(results[site])
                pass
            else:
                logger.debug(f'Found {username} in {site}, must be available')
                logger.debug(results[site])
                pass
            return False

    return site_data


async def check_and_add_maigret_site(site_data, semaphore, logger, ok_usernames, bad_usernames):
    async with semaphore:
        sitename = site_data.name
        positive = False
        negative = False

        for ok_username in ok_usernames:
            site_data.username_claimed = ok_username
            status = MaigretCheckStatus.CLAIMED
            if await maigret_check(sitename, site_data, ok_username, status, logger):
                # print(f'{sitename} positive case is okay')
                positive = True
                break

        for bad_username in bad_usernames:
            site_data.username_unclaimed = bad_username
            status = MaigretCheckStatus.AVAILABLE
            if await maigret_check(sitename, site_data, bad_username, status, logger):
                # print(f'{sitename} negative case is okay')
                negative = True
                break

        if positive and negative:
            site_data = site_data.strip_engine_data()

            db.update_site(site_data)
            print(site_data.json)
            try:
                db.save_to_file(args.base_file)
            except Exception as e:
                logging.error(e, exc_info=True)
            print(f'Saved new site {sitename}...')
            ok_sites.append(site_data)


if __name__ == '__main__':
    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
                            )
    parser.add_argument("--base", "-b", metavar="BASE_FILE",
                        dest="base_file", default="maigret/resources/data.json",
                        help="JSON file with sites data to update.")

    parser.add_argument("--add-engine", dest="add_engine", help="Additional engine to check")

    parser.add_argument("--only-engine", dest="only_engine", help="Use only this engine from detected to check")

    parser.add_argument('--check', help='only check sites in database', action='store_true')

    parser.add_argument('--random', help='shuffle list of urls', action='store_true', default=False)

    parser.add_argument('--top', help='top count of records in file', type=int, default=10000)

    parser.add_argument('--filter', help='substring to filter input urls', type=str, default='')

    parser.add_argument('--username', help='preferable username to check with', type=str)

    parser.add_argument(
        "--info",
        "-vv",
        action="store_true",
        dest="info",
        default=False,
        help="Display service information.",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        dest="verbose",
        default=False,
        help="Display extra information and metrics.",
    )
    parser.add_argument(
        "-d",
        "--debug",
        "-vvv",
        action="store_true",
        dest="debug",
        default=False,
        help="Saving debugging information and sites responses in debug.txt.",
    )

    parser.add_argument("urls_file",
                        metavar='URLS_FILE',
                        action="store",
                        help="File with base site URLs"
                        )

    args = parser.parse_args()

    log_level = logging.ERROR
    if args.debug:
        log_level = logging.DEBUG
    elif args.info:
        log_level = logging.INFO
    elif args.verbose:
        log_level = logging.WARNING

    logging.basicConfig(
        format='[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
        datefmt='%H:%M:%S',
        level=log_level
    )
    logger = logging.getLogger('engines-check')
    logger.setLevel(log_level)

    db = MaigretDatabase()
    sites_subset = db.load_from_file(args.base_file).sites
    sites = {site.name: site for site in sites_subset}
    engines = db.engines

    # TODO: usernames extractors
    ok_usernames = ['alex', 'god', 'admin', 'red', 'blue', 'john']
    if args.username:
        ok_usernames = [args.username] + ok_usernames

    bad_usernames = ['noonewouldeverusethis7']

    with open(args.urls_file, 'r') as urls_file:
        urls = urls_file.read().splitlines()
        if args.random:
            random.shuffle(urls)
        urls = urls[:args.top]

    raw_maigret_data = json.dumps({site.name: site.json for site in sites_subset})

    new_sites = []
    for site in alive_progress.alive_it(urls):
        site_lowercase = site.lower()

        domain_raw = URL_RE.sub('', site_lowercase).strip().strip('/')
        domain_raw = domain_raw.split('/')[0]

        if args.filter and args.filter not in domain_raw:
            logger.debug('Site %s skipped due to filtering by "%s"', domain_raw, args.filter)
            continue

        if domain_raw in raw_maigret_data:
            logger.debug(f'Site {domain_raw} already exists in the Maigret database!')
            continue

        if '"' in domain_raw:
            logger.debug(f'Invalid site {domain_raw}')
            continue

        main_page_url = '/'.join(site.split('/', 3)[:3])

        site_data = {
            'url': site,
            'urlMain': main_page_url,
            'name': domain_raw,
        }

        try:
            r = requests.get(main_page_url, timeout=5)
        except:
            r = None
            pass

        detected_engines = []

        for e in engines:
            strs_to_check = e.__dict__.get('presenseStrs')
            if strs_to_check and r and r.text:
                all_strs_in_response = True
                for s in strs_to_check:
                    if not s in r.text:
                        all_strs_in_response = False
                if all_strs_in_response:
                    engine_name = e.__dict__.get('name')
                    detected_engines.append(engine_name)
                    logger.info(f'Detected engine {engine_name} for site {main_page_url}')

        if args.only_engine and args.only_engine in detected_engines:
            detected_engines = [args.only_engine]
        elif not detected_engines and args.add_engine:
            logging.debug('Could not detect any engine, applying default engine %s...', args.add_engine)
            detected_engines = [args.add_engine]

        def create_site_from_engine(sitename, data, e):
            site = MaigretSite(sitename, data)
            site.update_from_engine(db.engines_dict[e])
            site.engine = e
            return site

        for engine_name in detected_engines:
            site = create_site_from_engine(domain_raw, site_data, engine_name)
            new_sites.append(site)
            logger.debug(site.json)

            # if engine_name == "phpBB":
            #     site_data_with_subpath = dict(site_data)
            #     site_data_with_subpath["urlSubpath"] = "/forum"
            #     site = create_site_from_engine(domain_raw, site_data_with_subpath, engine_name)
            #     new_sites.append(site)

        # except Exception as e:
        #     print(f'Error: {str(e)}')
        #     pass

    print(f'Found {len(new_sites)}/{len(urls)} new sites')

    if args.check:
        for s in new_sites:
            print(s.url_main)
        sys.exit(0)

    sem = asyncio.Semaphore(20)
    loop = asyncio.get_event_loop()

    ok_sites = []
    tasks = []
    for site in new_sites:
        check_coro = check_and_add_maigret_site(site, sem, logger, ok_usernames, bad_usernames)
        future = asyncio.ensure_future(check_coro)
        tasks.append(future)

    with alive_progress(len(tasks), title='Checking sites') as progress:
        for f in asyncio.as_completed(tasks):
            progress()
        try:
            loop.run_until_complete(f)
        except asyncio.exceptions.TimeoutError:
            pass

    print(f'Found and saved {len(ok_sites)} sites!')
