using System.Text.RegularExpressions;
using System.Xml;
using LLM.HHData.Config;
using LLM.HHData.Http;
using Microsoft.Extensions.Options;

namespace LLM.HHData.Services;

public interface ISitemapService
{
    Task<string> GetMainSitemapAsync(CancellationToken ct);
    IEnumerable<string> ExtractEmployerSitemapUrls(string mainXml);                         
    Task<IEnumerable<string>> GetEmployerIdsFromSitemapAsync(string url, CancellationToken ct); 
}

public sealed class SitemapService : ISitemapService
{
    private readonly AppConfig _cfg;
    private readonly IHttpSender _http;
    private static readonly Regex EmployerUrlRx = new(
       @"^https?://hh\.ru/employer/(\d+)$", 
        RegexOptions.Compiled);

    public SitemapService(IOptions<AppConfig> cfg, IHttpSender http)
    { _cfg = cfg.Value; _http = http; }

    public Task<string> GetMainSitemapAsync(CancellationToken ct)
        => _http.GetStringAsync(_cfg.BaseUrls.RootSitemap, "application/xml", ct);

    // НОВОЕ: извлекаем ссылки на employer-*.xml из main.xml
    public IEnumerable<string> ExtractEmployerSitemapUrls(string mainXml)
    {
        using var sr = new StringReader(mainXml);
        using var xr = XmlReader.Create(sr, new XmlReaderSettings { IgnoreComments = true, IgnoreProcessingInstructions = true });
        while (xr.Read())
        {
            if (xr.NodeType == XmlNodeType.Element && xr.Name == "loc")
            {
                var val = xr.ReadElementContentAsString();
                if (!string.IsNullOrWhiteSpace(val) && val.Contains("/sitemap/employer") && !val.EndsWith("employers.xml"))
                    yield return val.Trim();
            }
        }
    }

    // НОВОЕ: из конкретного employer-*.xml достаём ID работодателей
    public async Task<IEnumerable<string>> GetEmployerIdsFromSitemapAsync(string employerSitemapUrl, CancellationToken ct)
    {
        var xml = await _http.GetStringAsync(employerSitemapUrl, "application/xml", ct);
        var set = new HashSet<string>();
        using var sr = new StringReader(xml);
        using var xr = XmlReader.Create(sr, new XmlReaderSettings { IgnoreComments = true, IgnoreProcessingInstructions = true });
        while (xr.Read())
        {
            if (xr.NodeType == XmlNodeType.Element && xr.Name == "loc")
            {
                var url = xr.ReadElementContentAsString()?.Trim();
                if (string.IsNullOrEmpty(url)) continue;
                var m = EmployerUrlRx.Match(url);
                if (m.Success) set.Add(m.Groups[1].Value);
            }
        }
        return set;
    }
}