【JavaScript】キーワード検索してURLリストをCSV出力 – bizlabo.site

使用方法

baseURL は仮に https://example.com/ にしてあるから、実プロジェクトでは自分のURLに直してね。
searchPatterns / replacementTexts の組は自由に増減OK。
CSVの1行目に「元テキスト」、2行目に「修正後テキスト」、その後にURL別マッチ結果が出るよ。

const fs = require("fs");
const path = require("path");

// コマンドライン引数を処理（--use-replacement または -r の指定で置換モード）
const args = process.argv.slice(2);
const useReplacementForSearch =
  args.includes("--use-replacement") || args.includes("-r");

// 検索対象のキーフレーズ（Webサイトによくある案内・マーケティング系文言）
const searchPatterns = [
  "今すぐお問い合わせください。",
  "無料でご相談いただけます。",
  "詳細はこちらをご覧ください。",
  "お急ぎの方はお電話でご連絡ください。",
  "初回相談は無料です。",
  "お気軽にご相談ください。",
  "専門スタッフが丁寧に対応いたします。",
  "まずは無料で資料請求",
  "お問い合わせフォームはこちら",
  "今すぐ資料をダウンロード",
];

// 上記文言の、修正・改善されたバージョン（より自然または丁寧な言い換え）
const replacementTexts = [
  "お問い合わせはいつでも可能です。",
  "無料相談を承っております。",
  "詳しい情報はリンク先をご確認ください。",
  "お急ぎの際はお電話でも承ります。",
  "初回のご相談は費用がかかりません。",
  "どうぞお気軽にご連絡ください。",
  "スタッフが丁寧にご案内いたします。",
  "無料の資料をご利用いただけます。",
  "お問い合わせはこちらのフォームからどうぞ。",
  "資料を今すぐダウンロードできます。",
];

// 使用する検索対象の文言（修正前 or 修正後）
const patternsToSearch = useReplacementForSearch
  ? replacementTexts
  : searchPatterns;

// 検索対象から除外するディレクトリ（Web開発でよくあるフォルダ名）
const excludeDirs = [
  "assets",
  "images",
  "scripts",
  "styles",
  "node_modules",
  "vendor",
  "backup",
  "test",
  "archive",
  "partials",
  "includes",
].map((p) => new RegExp(p));

// 除外するファイル名（共通パーツやテンプレートなど）
const excludeFiles = ["template", "header", "footer", "test"].map(
  (p) => new RegExp(p)
);

// プロジェクトのルートパスとベースURL（出力用）
const folderRoot = process.cwd();
const baseURL = "https://example.com/";
const urlResults = {};

// 除外判定（特定ディレクトリ・ファイル名を除外）
function shouldExclude(dirPath, fileName) {
  return (
    excludeDirs.some((rx) => rx.test(dirPath)) ||
    excludeFiles.some((rx) => rx.test(fileName))
  );
}

// 再帰的にディレクトリを探索し、対象のファイルを処理
function walkDirectory(dir) {
  const entries = fs.readdirSync(dir, { withFileTypes: true });

  for (const entry of entries) {
    const fullPath = path.join(dir, entry.name);

    if (entry.isDirectory()) {
      walkDirectory(fullPath);
    } else if (entry.name.match(/\.(html|php)$/)) {
      const relativePath = path.relative(folderRoot, fullPath);
      const dirPart = path.dirname(relativePath);
      const fileName = path.basename(relativePath);

      if (shouldExclude(dirPart, fileName)) continue;

      const content = fs.readFileSync(fullPath, "utf8");
      const url = baseURL + relativePath.replace(/\\/g, "/");

      let hasMatch = false;
      const matchResults = {};

      patternsToSearch.forEach((p, index) => {
        const matches = content.includes(p);
        if (matches) {
          if (useReplacementForSearch) {
            matchResults[replacementTexts[index]] = "○";
          } else {
            matchResults[searchPatterns[index]] = "○";
          }
          hasMatch = true;
        }
      });

      if (hasMatch) {
        urlResults[url] = matchResults;
      }
    }
  }
}

// 検索の実行
walkDirectory(folderRoot);

// 出力ファイルの作成（検索モード＆日付を含むファイル名）
const dateStr = new Date().toISOString().slice(0, 10).replace(/-/g, "");
const searchType = useReplacementForSearch ? "replacement" : "original";
const csvPath = `search_matrix_${searchType}_${dateStr}.csv`;

// ヘッダー行（修正前と修正後のキーフレーズ）を定義
const header = ["修正前のテキスト", ...searchPatterns];
const header2 = ["修正後のテキスト", ...replacementTexts];
const emptyLine = Array(header.length).fill("").join(",");

// 初期行追加
const lines = [header.join(","), header2.join(","), emptyLine];

// 結果データをCSV行として追加
Object.keys(urlResults).forEach((url) => {
  const matchResults = urlResults[url];
  const line = [url];
  const patterns = useReplacementForSearch ? replacementTexts : searchPatterns;

  patterns.forEach((pattern) => {
    line.push(matchResults[pattern] || "");
  });

  lines.push(line.join(","));
});

// UTF-8 (BOM付き) でCSVファイルを書き出す
const bom = Buffer.from([0xef, 0xbb, 0xbf]);
const csvBuffer = Buffer.concat([bom, Buffer.from(lines.join("\n"), "utf8")]);

fs.writeFileSync(csvPath, csvBuffer);
console.log(
  `CSV生成完了: ${csvPath}（検索モード: ${
    useReplacementForSearch ? "修正後" : "修正前"
  }）`
);