B42/at_django_boilerplate/utils/company_matching.py

from rapidfuzz import fuzz
from company_search.models import CompanySearch

from django.views.generic import View
from django.http import JsonResponse

from django.conf import settings
import random

CORPORATE_STOPWORDS = {
    "private", "pvt", "limited", "ltd", "llp", "company",
    "corp", "corporation", "india", "enterprises", "enterprise",
    "solutions", "product", "products"
}

def clean_text(text: str) -> str:
    """Remove common corporate words."""
    if not text:
        return ""
    words = text.lower().split()
    cleaned = [w for w in words if w not in CORPORATE_STOPWORDS]
    return " ".join(cleaned)


def normalize_query(query: str) -> str:
    """Ensure query includes 'Limited' if missing."""
    q = query.strip().lower()
    if "limited" not in q and "ltd" not in q and "pvt" not in q:
        q += " limited"
    return q


MIN_SCORE = getattr(settings, "COMPANY_MATCHING_MIN_SCORE", 50)

def find_match(query: str, min_score: int = None):
    """
    Loops through all companies and returns the best fuzzy match.

    Returns:
        (best_company_object, score)
        OR (None, score) if score >= min_score (too similar)
    """
    if min_score is None:
        min_score = MIN_SCORE

    # Normalize and clean query
    q_clean = clean_text(normalize_query(query))

    companies = CompanySearch.objects.all()

    best_company = None
    best_score = 0

    for company in companies:
        name = company.company_name or ""
        name_clean = clean_text(name)

        score = fuzz.partial_ratio(q_clean, name_clean)

        # pick the highest score
        if score > best_score:
            best_company = company
            best_score = score

    # Reject if similarity too high
    if best_score >= min_score:
        return None, best_score

    return best_company, best_score


def find_company_match(query: str, min_score: int = None):
    """
    Returns:
        (best_company_object, score)
        OR (None, score) when score >= cutoff (too similar)
    """
    if min_score is None:
        min_score = MIN_SCORE
    return find_match(query, min_score)


class CheckCompanyNameAvailability(View):
    MIN_SCORE = MIN_SCORE  # gets default from settings

    def get(self, request):
        company_name = request.GET.get('company_name', None)
        is_taken = False

        if company_name:
            if CompanySearch.objects.filter(company_name__iexact=company_name).exists():
                is_taken = True
            else:
                best_company, best_score = find_match(company_name, min_score=self.MIN_SCORE)

                if best_company is None and best_score >= self.MIN_SCORE:
                    is_taken = True

        data = {
            'is_taken': is_taken,
        }

        return JsonResponse(data)


def generate_suggestions(original_name, num_suggestions=4):
    """
    Generate intelligent, varied company name suggestions based on the original name.
    Avoids repetitive hardcoded patterns.
    """
    if not original_name:
        return []

    # Clean the original name
    name = original_name.strip().upper()

    # Common Indian company suffixes to remove for better base name
    suffixes = [
        'PRIVATE LIMITED', 'PVT LTD', 'LTD', 'LIMITED', 'LLP', 'OPC',
        'INDIA PRIVATE LIMITED', 'TECHNOLOGIES', 'SOLUTIONS', 'VENTURES',
        'ENTERPRISES', 'SYSTEMS', 'LABS', 'DIGITAL', 'TECH'
    ]

    base_name = name
    for suffix in suffixes:
        if base_name.endswith(suffix):
            base_name = base_name.replace(suffix, '').strip()
            break

    # Split into words for smarter mixing
    words = [word for word in base_name.split() if len(word) > 2]
    if not words:
        words = [base_name[:10]]  # fallback

    # Dynamic prefix/suffix lists (more variety, India-relevant)
    prefixes = ['Nex', 'Pro', 'Smart', 'Prime', 'Elite', 'Global', 'Alpha', 'Beta', 'Core', 'Zen', 'Apex', 'Nova', 'Viva', 'Omni', 'Eco', 'True', 'Pure', 'First', 'Best', 'Ultra', 'Mega', 'Neo']
    suffixes = ['Hub', 'Labs', 'Works', 'Craft', 'Forge', 'Nest', 'Space', 'Grid', 'Link', 'Wave', 'Spark', 'Pulse', 'Flow', 'Edge', 'Peak', 'Rise', 'Shift', 'Bridge', 'Path', 'Root', 'Source']
    domains = ['Tech', 'Digital', 'Info', 'Net', 'Web', 'Cloud', 'Data', 'Soft', 'Systems', 'Networks', 'Media', 'Vision', 'Horizon', 'Future']

    legal_suffixes = ['PRIVATE LIMITED', 'PVT LTD', 'LIMITED', 'LLP']

    suggestions = set()  # Use set to avoid duplicates

    # 1. Smart combinations
    if len(words) >= 1:
        main_word = random.choice(words)
        suggestions.add(f"{main_word} {random.choice(suffixes)} PRIVATE LIMITED")
        suggestions.add(f"{random.choice(prefixes)}{main_word} PRIVATE LIMITED")
        suggestions.add(f"{main_word}{random.choice(domains)} PRIVATE LIMITED")

    # 2. Add year variations (common in India)
    current_year = 2026
    for year in [current_year, current_year + 1, current_year - 1]:
        suggestions.add(f"{base_name} {year} PRIVATE LIMITED")

    # 3. Location-inspired (if name has city/state hint — optional future enhancement)
    # You can expand this later with user location

    # 4. Premium feel
    premium = ['Capital', 'Holdings', 'Group', 'Industries', 'Corporation']
    suggestions.add(f"{base_name} {random.choice(premium)} PRIVATE LIMITED")

    # 5. Modern trendy words
    trendy = ['Byte', 'Pixel', 'Quantum', 'Fusion', 'Synergy', 'Nexus', 'Vertex', 'Axis', 'Orbit']
    suggestions.add(f"{random.choice(trendy)} {base_name} PRIVATE LIMITED")

    # Convert to list, shuffle for variety, limit
    suggestion_list = list(suggestions)
    random.shuffle(suggestion_list)

    # Always end with legal suffix
    final_suggestions = [s.strip() for s in suggestion_list if s.strip()]

    return final_suggestions[:num_suggestions]