# frozen_string_literal: true URL = "https://outline-rocks.github.io/wcag/translations/WCAG21-de/" WCAG_22_EN_URL ="https://www.w3.org/TR/WCAG22/" WCAG_22_EN_QREF_URL = "https://www.w3.org/WAI/WCAG22/quickref/" def translate(input) DeepL.translate(input, "EN", "DE") end def import_wcag22en doc = Nokogiri::HTML5(URI.open(WCAG_22_EN_URL)) qrefdoc = Nokogiri::HTML5(URI.open(WCAG_22_EN_QREF_URL)) quick_criteria = qrefdoc.css(".guidelines section:has(h4)").each_with_object({}) do |node, h| h[node.css("h4 strong").first.content] = { quick_criterion_en: node.css(".sc-content .sc-text p").first.content, link_url: node.css(".understanding a").first.attribute("href").value, perm_url: "#{WCAG_22_EN_QREF_URL}##{node.css("article").first.attribute("id").value}" } rescue debugger end link_category = LinkCategory.find_by(name: "Verstehen") qr_category = LinkCategory.find_by(name: "WCAG Quick Reference") # debugger # raise ActiveRecord::Rollback standards = [ Standard.find_by(name_de: "WCAG 2.2"), Standard.find_by(name_de: "EN 301 549") ] doc.css("section.principle").each do |principle_node| _principle_id = principle_node.attributes["id"].value principle_title = principle_node.css("h2").first.content.scan(/([a-zA-Z]+)/) principle = Principle.find_or_create_by!(name_en: principle_title) principle_node.css("section.guideline").each do |guideline_node| next unless guideline_node.css("h3").first puts guideline_node.css("h3").first&.content puts guideline_node.css_path g_title = guideline_node.css("h3") .first .content .scan(/Guideline \d+\.\d+ (.*)/) .first .first g_number = guideline_node.css("h3 bdi") .first .content .scan(/\d+\.(\d+)/) .first .first g_text = guideline_node.css("> p").first.content guideline = Guideline.find_or_create_by(principle: principle, number: g_number, name_en: g_title) guideline.update(description_en: g_text, description_de: translate(g_text), name_de: translate(g_title)) if guideline.description_de.blank? guideline_node.css("section.guideline").each do |sc| puts sc.css_path puts sc.css("h4 bdi") sc_number, sc_title = sc.css("h4") .first .content .scan(/Success Criterion \d+\.\d+\.(\d+) (.*)/) .first sc_level = sc.css("p.conformance-level").first&.content&.scan(/\(Level (A+)\)/)&.first&.last sc_url = sc.css("a.self-link").first.attr("href") sc_conformity_notice = sc.css("div.note p").to_a.map(&:to_s).join("\n") full_text = sc.children .select { _1.class == Nokogiri::XML::Element } .select { _1.attr("class").nil? || _1.attr("class") == "note" } .reduce("") { |str, node| str + node.to_s } check = Check.find_or_initialize_by(guideline_id: guideline.id, number: sc_number) new_standards = Set.new(check.standards) new_standards += standards check.name_de = translate sc_title if check.name_de.blank? check.name_en = sc_title check.standards = new_standards check.applicable_to_app = check.applicable_to_web = true check.external_url = "#{WCAG_22_EN_URL}#{sc_url}" check.conformity_level = sc_level&.to_sym check.conformity_notice_de = translate sc_conformity_notice check.criterion_de = "
#{sc.css('dl').first&.to_s}
" if sc.css("dl").first # _sc_number = sc_title.scan(/Erfolgskriterium \d+\.\d+\.(\d+).*/).first.last check = Check.find_or_initialize_by(external_number: sc_number) check.name_de = sc_title check.principle = principle check.standards << standard unless check.standards.include?(standard) check.applicable_to_app = check.applicable_to_web = true check.external_number = sc_number check.external_url = "#{URL}#{sc_url}" check.conformity_level = sc_level.to_sym check.conformity_notice_de = sc_conformity_notice check.criterion_de = "#{text}