How to Get All Links on an HTML Page with JavaScript

Introduction

Extracting all links from a page is a common task — for link auditing, SEO analysis, building sitemaps, or scraping. JavaScript gives you several ways to do this, from simple DOM queries to more advanced filtering and deduplication. See Javascript Guide for more context. See Javascript Guide for more context.

Basic: Get All Anchor Tags

The simplest approach uses querySelectorAll('a') to grab every <a> element:

const links = document.querySelectorAll('a');

links.forEach(link => {
  console.log(link.href, link.textContent.trim());
});

Collect Links into an Array

function getAllLinks() {
  const links = document.querySelectorAll('a');

  return Array.from(links).map(link => ({
    text: link.textContent.trim(),
    href: link.href,
    title: link.title || null,
    target: link.target || null
  }));
}

const links = getAllLinks();
console.log(JSON.stringify(links, null, 2));

Wait for DOM Ready

If your script runs before the page finishes loading, wrap it in a DOMContentLoaded listener:

const sites = [];

document.addEventListener('DOMContentLoaded', () => {
  const links = document.querySelectorAll('a');

  links.forEach(link => {
    sites.push({
      name: link.outerText.trim(),
      url: link.href
    });
  });

  console.log(JSON.stringify(sites, null, 2));
});

Filter by Link Type

External Links Only

function getExternalLinks() {
  const currentHost = window.location.hostname;

  return Array.from(document.querySelectorAll('a[href]'))
    .filter(link => {
      try {
        const url = new URL(link.href);
        return url.hostname !== currentHost;
      } catch {
        return false; // invalid URL
      }
    })
    .map(link => ({
      text: link.textContent.trim(),
      href: link.href,
      host: new URL(link.href).hostname
    }));
}

console.log(getExternalLinks());

Internal Links Only

function getInternalLinks() {
  const currentHost = window.location.hostname;

  return Array.from(document.querySelectorAll('a[href]'))
    .filter(link => {
      try {
        const url = new URL(link.href);
        return url.hostname === currentHost;
      } catch {
        return false;
      }
    })
    .map(link => link.href);
}

Links with Specific Attributes

// Links that open in a new tab
const newTabLinks = document.querySelectorAll('a[target="_blank"]');

// Links with a specific class
const navLinks = document.querySelectorAll('nav a');

// Links pointing to PDFs
const pdfLinks = Array.from(document.querySelectorAll('a[href]'))
  .filter(link => link.href.endsWith('.pdf'));

// Links with no href (anchor-only)
const anchorLinks = document.querySelectorAll('a:not([href])');

Deduplicate Links

function getUniqueLinks() {
  const seen = new Set();

  return Array.from(document.querySelectorAll('a[href]'))
    .filter(link => {
      const url = link.href;
      if (seen.has(url)) return false;
      seen.add(url);
      return true;
    })
    .map(link => ({
      text: link.textContent.trim(),
      href: link.href
    }));
}

Run in Browser Console (Bookmarklet)

You can paste this directly into the browser console to extract links from any page:

// Paste in browser console
(function() {
  const links = Array.from(document.querySelectorAll('a[href]'))
    .map(a => ({ text: a.textContent.trim(), href: a.href }))
    .filter(l => l.href.startsWith('http'));

  const unique = [...new Map(links.map(l => [l.href, l])).values()];

  console.table(unique);
  copy(JSON.stringify(unique, null, 2)); // copies to clipboard
  console.log(`${unique.length} unique links copied to clipboard`);
})();

Node.js: Extract Links from HTML String

When working server-side (e.g., scraping), use a parser like cheerio:

npm install cheerio

import * as cheerio from 'cheerio';
import fetch from 'node-fetch';

async function extractLinks(url) {
  const response = await fetch(url);
  const html = await response.text();
  const $ = cheerio.load(html);

  const links = [];

  $('a[href]').each((_, el) => {
    const href = $(el).attr('href');
    const text = $(el).text().trim();

    // Resolve relative URLs
    try {
      const absolute = new URL(href, url).href;
      links.push({ text, href: absolute });
    } catch {
      // skip invalid URLs
    }
  });

  return links;
}

const links = await extractLinks('https://example.com');
console.log(links);

Check for Broken Links

async function checkLinks(links) {
  const results = await Promise.allSettled(
    links.map(async link => {
      const response = await fetch(link.href, { method: 'HEAD' });
      return {
        href: link.href,
        status: response.status,
        ok: response.ok
      };
    })
  );

  return results.map(r => r.status === 'fulfilled' ? r.value : {
    href: 'unknown',
    status: 0,
    ok: false,
    error: r.reason?.message
  });
}

const links = getUniqueLinks();
const results = await checkLinks(links);
const broken = results.filter(r => !r.ok);
console.log('Broken links:', broken);

Practical: Export Links as CSV

function exportLinksAsCSV() {
  const links = Array.from(document.querySelectorAll('a[href]'))
    .map(a => ({
      text: a.textContent.trim().replace(/,/g, ' '),
      href: a.href
    }));

  const csv = [
    'Text,URL',
    ...links.map(l => `"${l.text}","${l.href}"`)
  ].join('\n');

  const blob = new Blob([csv], { type: 'text/csv' });
  const url = URL.createObjectURL(blob);

  const a = document.createElement('a');
  a.href = url;
  a.download = 'links.csv';
  a.click();

  URL.revokeObjectURL(url);
}

exportLinksAsCSV();

Summary

Method	Use Case
`querySelectorAll('a')`	Get all anchor elements
`querySelectorAll('a[href]')`	Only links with an href
Filter by hostname	Separate internal vs external
`Set` deduplication	Remove duplicate URLs
`cheerio` (Node.js)	Parse HTML server-side
`fetch HEAD`	Check if links are alive

How to Get All Links on an HTML Page with JavaScript

Introduction

Basic: Get All Anchor Tags

Collect Links into an Array

Wait for DOM Ready

Filter by Link Type

External Links Only

Internal Links Only

Links with Specific Attributes

Deduplicate Links

Run in Browser Console (Bookmarklet)

Node.js: Extract Links from HTML String

Check for Broken Links

Practical: Export Links as CSV

Summary

Resources

Comments

Share this article

👍 Was this article helpful?