Files
B42/at_django_boilerplate/utils/company_matching.py
2026-01-07 12:09:20 +05:30

175 lines
5.7 KiB
Python
Executable File

from rapidfuzz import fuzz
from company_search.models import CompanySearch
from django.views.generic import View
from django.http import JsonResponse
from django.conf import settings
import random
CORPORATE_STOPWORDS = {
"private", "pvt", "limited", "ltd", "llp", "company",
"corp", "corporation", "india", "enterprises", "enterprise",
"solutions", "product", "products"
}
def clean_text(text: str) -> str:
"""Remove common corporate words."""
if not text:
return ""
words = text.lower().split()
cleaned = [w for w in words if w not in CORPORATE_STOPWORDS]
return " ".join(cleaned)
def normalize_query(query: str) -> str:
"""Ensure query includes 'Limited' if missing."""
q = query.strip().lower()
if "limited" not in q and "ltd" not in q and "pvt" not in q:
q += " limited"
return q
MIN_SCORE = getattr(settings, "COMPANY_MATCHING_MIN_SCORE", 50)
def find_match(query: str, min_score: int = None):
"""
Loops through all companies and returns the best fuzzy match.
Returns:
(best_company_object, score)
OR (None, score) if score >= min_score (too similar)
"""
if min_score is None:
min_score = MIN_SCORE
# Normalize and clean query
q_clean = clean_text(normalize_query(query))
companies = CompanySearch.objects.all()
best_company = None
best_score = 0
for company in companies:
name = company.company_name or ""
name_clean = clean_text(name)
score = fuzz.partial_ratio(q_clean, name_clean)
# pick the highest score
if score > best_score:
best_company = company
best_score = score
# Reject if similarity too high
if best_score >= min_score:
return None, best_score
return best_company, best_score
def find_company_match(query: str, min_score: int = None):
"""
Returns:
(best_company_object, score)
OR (None, score) when score >= cutoff (too similar)
"""
if min_score is None:
min_score = MIN_SCORE
return find_match(query, min_score)
class CheckCompanyNameAvailability(View):
MIN_SCORE = MIN_SCORE # gets default from settings
def get(self, request):
company_name = request.GET.get('company_name', None)
is_taken = False
if company_name:
if CompanySearch.objects.filter(company_name__iexact=company_name).exists():
is_taken = True
else:
best_company, best_score = find_match(company_name, min_score=self.MIN_SCORE)
if best_company is None and best_score >= self.MIN_SCORE:
is_taken = True
data = {
'is_taken': is_taken,
}
return JsonResponse(data)
def generate_suggestions(original_name, num_suggestions=4):
"""
Generate intelligent, varied company name suggestions based on the original name.
Avoids repetitive hardcoded patterns.
"""
if not original_name:
return []
# Clean the original name
name = original_name.strip().upper()
# Common Indian company suffixes to remove for better base name
suffixes = [
'PRIVATE LIMITED', 'PVT LTD', 'LTD', 'LIMITED', 'LLP', 'OPC',
'INDIA PRIVATE LIMITED', 'TECHNOLOGIES', 'SOLUTIONS', 'VENTURES',
'ENTERPRISES', 'SYSTEMS', 'LABS', 'DIGITAL', 'TECH'
]
base_name = name
for suffix in suffixes:
if base_name.endswith(suffix):
base_name = base_name.replace(suffix, '').strip()
break
# Split into words for smarter mixing
words = [word for word in base_name.split() if len(word) > 2]
if not words:
words = [base_name[:10]] # fallback
# Dynamic prefix/suffix lists (more variety, India-relevant)
prefixes = ['Nex', 'Pro', 'Smart', 'Prime', 'Elite', 'Global', 'Alpha', 'Beta', 'Core', 'Zen', 'Apex', 'Nova', 'Viva', 'Omni', 'Eco', 'True', 'Pure', 'First', 'Best', 'Ultra', 'Mega', 'Neo']
suffixes = ['Hub', 'Labs', 'Works', 'Craft', 'Forge', 'Nest', 'Space', 'Grid', 'Link', 'Wave', 'Spark', 'Pulse', 'Flow', 'Edge', 'Peak', 'Rise', 'Shift', 'Bridge', 'Path', 'Root', 'Source']
domains = ['Tech', 'Digital', 'Info', 'Net', 'Web', 'Cloud', 'Data', 'Soft', 'Systems', 'Networks', 'Media', 'Vision', 'Horizon', 'Future']
legal_suffixes = ['PRIVATE LIMITED', 'PVT LTD', 'LIMITED', 'LLP']
suggestions = set() # Use set to avoid duplicates
# 1. Smart combinations
if len(words) >= 1:
main_word = random.choice(words)
suggestions.add(f"{main_word} {random.choice(suffixes)} PRIVATE LIMITED")
suggestions.add(f"{random.choice(prefixes)}{main_word} PRIVATE LIMITED")
suggestions.add(f"{main_word}{random.choice(domains)} PRIVATE LIMITED")
# 2. Add year variations (common in India)
current_year = 2026
for year in [current_year, current_year + 1, current_year - 1]:
suggestions.add(f"{base_name} {year} PRIVATE LIMITED")
# 3. Location-inspired (if name has city/state hint — optional future enhancement)
# You can expand this later with user location
# 4. Premium feel
premium = ['Capital', 'Holdings', 'Group', 'Industries', 'Corporation']
suggestions.add(f"{base_name} {random.choice(premium)} PRIVATE LIMITED")
# 5. Modern trendy words
trendy = ['Byte', 'Pixel', 'Quantum', 'Fusion', 'Synergy', 'Nexus', 'Vertex', 'Axis', 'Orbit']
suggestions.add(f"{random.choice(trendy)} {base_name} PRIVATE LIMITED")
# Convert to list, shuffle for variety, limit
suggestion_list = list(suggestions)
random.shuffle(suggestion_list)
# Always end with legal suffix
final_suggestions = [s.strip() for s in suggestion_list if s.strip()]
return final_suggestions[:num_suggestions]