Skip to content

Commit

Permalink
Manually resove eslint problems
Browse files Browse the repository at this point in the history
  • Loading branch information
janbuchar committed Mar 1, 2024
1 parent 123c732 commit 75db005
Show file tree
Hide file tree
Showing 20 changed files with 50 additions and 64 deletions.
7 changes: 2 additions & 5 deletions packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,6 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
if (this.retryOnBlocked) {
this.sessionPoolOptions.blockedStatusCodes = sessionPoolOptions.blockedStatusCodes ?? [];
if (this.sessionPoolOptions.blockedStatusCodes.length !== 0) {
// eslint-disable-next-line max-len
log.warning(
`Both 'blockedStatusCodes' and 'retryOnBlocked' are set. Please note that the 'retryOnBlocked' feature might not work as expected.`,
);
Expand Down Expand Up @@ -825,7 +824,6 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
let message: string;

if (operationMode === 'ERROR') {
// eslint-disable-next-line max-len
message = `Experiencing problems, ${
this.stats.state.requestsFailed - previousState.requestsFailed || this.stats.state.requestsFailed
} failed requests in the past ${this.statusMessageLoggingInterval} seconds.`;
Expand Down Expand Up @@ -952,7 +950,6 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
}

periodicLogger.stop();
// eslint-disable-next-line max-len
await this.setStatusMessage(
`Finished! Total ${this.stats.state.requestsFinished + this.stats.state.requestsFailed} requests: ${
this.stats.state.requestsFinished
Expand All @@ -966,8 +963,8 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext

async getRequestQueue() {
if (!this.requestQueue && this.requestList) {
// eslint-disable-next-line max-len
this.log.warningOnce(
// eslint-disable-next-line max-len
'When using RequestList and RequestQueue at the same time, you should instantiate both explicitly and provide them in the crawler options, to ensure correctly handled restarts of the crawler.',
);
}
Expand Down Expand Up @@ -1540,8 +1537,8 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
protected _augmentContextWithDeprecatedError(context: Context, error: Error) {
Object.defineProperty(context, 'error', {
get: () => {
// eslint-disable-next-line max-len
this.log.deprecated(
// eslint-disable-next-line max-len
"The 'error' property of the crawling context is deprecated, and it is now passed as the second parameter in 'errorHandler' and 'failedRequestHandler'. Please update your code, as this property will be removed in a future version.",
);

Expand Down
4 changes: 2 additions & 2 deletions packages/browser-crawler/src/internals/browser-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -457,8 +457,8 @@ export abstract class BrowserCrawler<
protected override async isRequestBlocked(crawlingContext: Context): Promise<string | false> {
const { page, response } = crawlingContext;

// eslint-disable-next-line dot-notation
const blockedStatusCodes =
// eslint-disable-next-line dot-notation
(this.sessionPool?.['blockedStatusCodes'].length ?? 0) > 0
? // eslint-disable-next-line dot-notation
this.sessionPool!['blockedStatusCodes']
Expand Down Expand Up @@ -803,8 +803,8 @@ export async function browserCrawlerEnqueueLinks({
* Extracts URLs from a given page.
* @ignore
*/
// eslint-disable-next-line @typescript-eslint/ban-types
export async function extractUrlsFromPage(
// eslint-disable-next-line @typescript-eslint/ban-types
page: { $$eval: Function },
selector: string,
baseUrl: string,
Expand Down
2 changes: 1 addition & 1 deletion packages/browser-pool/src/browser-pool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,8 @@ export class BrowserPool<
const firstPluginName = firstPluginConstructor.name;
const providedPluginName = (providedPlugin as BrowserPlugin).constructor.name;

// eslint-disable-next-line max-len
throw new Error(
// eslint-disable-next-line max-len
`Browser plugin at index ${i} (${providedPluginName}) is not an instance of the same plugin as the first plugin provided (${firstPluginName}).`,
);
}
Expand Down
7 changes: 5 additions & 2 deletions packages/core/src/crawlers/crawler_commons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ import type { Session } from '../session_pool/session';
import type { RequestQueueOperationOptions, Dataset, RecordOptions } from '../storages';
import { KeyValueStore } from '../storages';

// we need `Record<string & {}, unknown>` here, otherwise `Omit<Context>` is resolved badly
// eslint-disable-next-line
export interface RestrictedCrawlingContext<UserData extends Dictionary = Dictionary>
// we need `Record<string & {}, unknown>` here, otherwise `Omit<Context>` is resolved badly
// eslint-disable-next-line
extends Record<string & {}, unknown> {
/**
* The original {@apilink Request} object.
Expand Down Expand Up @@ -160,8 +160,11 @@ export interface CrawlingContext<Crawler = unknown, UserData extends Dictionary
export class RequestHandlerResult {
private _keyValueStoreChanges: Record<string, Record<string, { changedValue: unknown; options?: RecordOptions }>> =
{};

private pushDataCalls: Parameters<RestrictedCrawlingContext['pushData']>[] = [];

private addRequestsCalls: Parameters<RestrictedCrawlingContext['addRequests']>[] = [];

private enqueueLinksCalls: Parameters<RestrictedCrawlingContext['enqueueLinks']>[] = [];

constructor(
Expand Down
15 changes: 3 additions & 12 deletions packages/core/src/enqueue_links/shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,7 @@ export function createRequests(
.filter(({ url }) => {
return !excludePatternObjects.some((excludePatternObject) => {
const { regexp, glob } = excludePatternObject;
return (
(regexp && url.match(regexp)) || // eslint-disable-line
(glob && minimatch(url, glob, { nocase: true }))
);
return (regexp && url.match(regexp)) || (glob && minimatch(url, glob, { nocase: true }));
});
})
.map(({ url, opts }) => {
Expand All @@ -184,10 +181,7 @@ export function createRequests(

for (const urlPatternObject of urlPatternObjects) {
const { regexp, glob, ...requestRegExpOptions } = urlPatternObject;
if (
(regexp && url.match(regexp)) || // eslint-disable-line
(glob && minimatch(url, glob, { nocase: true }))
) {
if ((regexp && url.match(regexp)) || (glob && minimatch(url, glob, { nocase: true }))) {
const request =
typeof opts === 'string'
? { url: opts, ...requestRegExpOptions, enqueueStrategy: strategy }
Expand All @@ -214,10 +208,7 @@ export function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatt
for (const urlPatternObject of patterns) {
const { regexp, glob } = urlPatternObject;

if (
(regexp && request.url.match(regexp)) || // eslint-disable-line
(glob && minimatch(request.url, glob, { nocase: true }))
) {
if ((regexp && request.url.match(regexp)) || (glob && minimatch(request.url, glob, { nocase: true }))) {
filtered.push(request);
// Break the pattern loop, as we already matched this request once
break;
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/session_pool/session_pool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,8 @@ export class SessionPool extends EventEmitter {
}

if (!this.persistStateKeyValueStoreId) {
// eslint-disable-next-line max-len
this.log.debug(
// eslint-disable-next-line max-len
`No 'persistStateKeyValueStoreId' options specified, this session pool's data has been saved in the KeyValueStore with the id: ${this.keyValueStore.id}`,
);
}
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/storages/request_provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ declare class BuiltRequestProvider extends RequestProvider {
override fetchNextRequest<T extends Dictionary = Dictionary>(
options?: RequestOptions<Dictionary> | undefined,
): Promise<Request<T> | null>;

protected override ensureHeadIsNonEmpty(): Promise<void>;
}

Expand Down
3 changes: 1 addition & 2 deletions packages/http-crawler/src/internals/http-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ import { RETRY_CSS_SELECTORS, gotScraping } from '@crawlee/utils';
import * as cheerio from 'cheerio';
import type { RequestLike, ResponseLike } from 'content-type';
import contentTypeParser from 'content-type';
// @ts-expect-error This throws a compilation error due to got-scraping being ESM only but we only import types, so its alllll gooooood
import type {
OptionsInit,
Method,
Request as GotRequest,
Options,
PlainResponse,
TimeoutError as TimeoutErrorClass,
// @ts-expect-error This throws a compilation error due to got-scraping being ESM only but we only import types, so its alllll gooooood
} from 'got-scraping';
import iconv from 'iconv-lite';
import mime from 'mime-types';
Expand Down Expand Up @@ -599,7 +599,6 @@ export class HttpCrawler<
if (gotOptions.headers?.Cookie && gotOptions.headers?.cookie) {
const { Cookie: upperCaseHeader, cookie: lowerCaseHeader } = gotOptions.headers;

// eslint-disable-next-line max-len
this.log.warning(
`Encountered mixed casing for the cookie headers in the got options for request ${request.url} (${request.id}). Their values will be merged`,
);
Expand Down
24 changes: 14 additions & 10 deletions packages/memory-storage/test/fs-fallback.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,20 @@ describe('fallback to fs for reading', () => {
});
});

test('attempting to read "other" key value store with no "__metadata__" present should read from fs, even if accessed without generating id first', async () => {
const otherStore = storage.keyValueStore('other');

const input = await otherStore.getRecord('INPUT');
expect(input).toStrictEqual<KeyValueStoreRecord>({
key: 'INPUT',
value: { foo: 'bar but from fs' },
contentType: 'application/json; charset=utf-8',
});
});
test(
'attempting to read "other" key value store with no "__metadata__" present should read from fs, ' +
'even if accessed without generating id first',
async () => {
const otherStore = storage.keyValueStore('other');

const input = await otherStore.getRecord('INPUT');
expect(input).toStrictEqual<KeyValueStoreRecord>({
key: 'INPUT',
value: { foo: 'bar but from fs' },
contentType: 'application/json; charset=utf-8',
});
},
);

test('attempting to read non-existent "default_2" key value store should return undefined', async () => {
await expect(storage.keyValueStore('default_2').get()).resolves.toBeUndefined();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,6 @@ export function registerUtilsToContext(context: PlaywrightCrawlingContext): void
context.infiniteScroll = async (options?: InfiniteScrollOptions) => infiniteScroll(context.page, options);
context.saveSnapshot = async (options?: SaveSnapshotOptions) =>
saveSnapshot(context.page, { ...options, config: context.crawler.config });
// eslint-disable-next-line max-len
context.enqueueLinksByClickingElements = async (
options: Omit<EnqueueLinksByClickingElementsOptions, 'page' | 'requestQueue'>,
) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ export async function sendCDPCommand<T extends keyof ProtocolMapping.Commands>(
const jsonPath = require.resolve('puppeteer/package.json');
const parsed = JSON.parse(await readFile(jsonPath, 'utf-8'));

// eslint-disable-next-line max-len
throw new Error(
`Cannot detect CDP client for Puppeteer ${parsed.version}. You should report this to Crawlee, mentioning the puppeteer version you are using.`,
);
Expand Down Expand Up @@ -1007,7 +1006,6 @@ export function registerUtilsToContext(context: PuppeteerCrawlingContext): void
await injectJQuery(context.page, { surviveNavigations: false });
};
context.parseWithCheerio = async () => parseWithCheerio(context.page);
// eslint-disable-next-line max-len
context.enqueueLinksByClickingElements = async (
options: Omit<EnqueueLinksByClickingElementsOptions, 'page' | 'requestQueue'>,
) =>
Expand Down
6 changes: 4 additions & 2 deletions packages/utils/src/internals/general.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,21 @@ import { setTimeout } from 'node:timers/promises';
* Default regular expression to match URLs in a string that may be plain text, JSON, CSV or other. It supports common URL characters
* and does not support URLs containing commas or spaces. The URLs also may contain Unicode letters (not symbols).
*/
// eslint-disable-next-line
export const URL_NO_COMMAS_REGEX = RegExp(
'https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+.~#?&//=\\(\\)]*)?',
'giu',
); // eslint-disable-line
);

/**
* Regular expression that, in addition to the default regular expression `URL_NO_COMMAS_REGEX`, supports matching commas in URL path and query.
* Note, however, that this may prevent parsing URLs from comma delimited lists, or the URLs may become malformed.
*/
// eslint-disable-next-line
export const URL_WITH_COMMAS_REGEX = RegExp(
'https?://(www\\.)?[\\p{L}0-9][-\\p{L}0-9@:%._\\+~#=]{0,254}[\\p{L}0-9]\\.[a-z]{2,63}(:\\d{1,5})?(/[-\\p{L}0-9@:%_\\+,.~#?&//=\\(\\)]*)?',
'giu',
); // eslint-disable-line
);

let isDockerPromiseCache: Promise<boolean> | undefined;

Expand Down
14 changes: 6 additions & 8 deletions packages/utils/src/internals/social.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import cheerio from 'cheerio';
import { htmlToText } from './cheerio';

// Regex inspired by https://zapier.com/blog/extract-links-email-phone-regex/
// eslint-disable-next-line max-len
const EMAIL_REGEX_STRING =
// eslint-disable-next-line max-len
'(?:[a-z0-9!#$%&\'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\\])';

/**
Expand Down Expand Up @@ -167,11 +167,9 @@ export function phonesFromUrls(urls: string[]): string[] {
// - They use a negative lookbehind and lookahead assertions, which are only supported in Node 8+.
// They are used to prevent matching URLs in strings like "blahttps://www.example.com"

// eslint-disable-next-line max-len
const LINKEDIN_REGEX_STRING =
'(?<!\\w)(?:(?:http(?:s)?:\\/\\/)?(?:(?:(?:[a-z]+\\.)?linkedin\\.com\\/(?:in|company)\\/)([a-z0-9\\-_%=]{2,60})(?![a-z0-9\\-_%=])))(?:\\/)?';

// eslint-disable-next-line max-len
const INSTAGRAM_REGEX_STRING =
'(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:(?:www\\.)?(?:instagram\\.com|instagr\\.am)\\/)(?!explore|_n|_u)([a-z0-9_.]{2,30})(?![a-z0-9_.])(?:/)?';

Expand All @@ -180,26 +178,26 @@ const TWITTER_RESERVED_PATHS =
// eslint-disable-next-line max-len
const TWITTER_REGEX_STRING = `(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:www.)?(?:twitter.com)\\/(?!(?:${TWITTER_RESERVED_PATHS})(?:[\\'\\"\\?\\.\\/]|$))([a-z0-9_]{1,15})(?![a-z0-9_])(?:/)?`;

// eslint-disable-next-line max-len
const FACEBOOK_RESERVED_PATHS =
// eslint-disable-next-line max-len
'rsrc\\.php|apps|groups|events|l\\.php|friends|images|photo.php|chat|ajax|dyi|common|policies|login|recover|reg|help|security|messages|marketplace|pages|live|bookmarks|games|fundraisers|saved|gaming|salesgroups|jobs|people|ads|ad_campaign|weather|offers|recommendations|crisisresponse|onthisday|developers|settings|connect|business|plugins|intern|sharer';
// eslint-disable-next-line max-len
const FACEBOOK_REGEX_STRING = `(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:www.)?(?:facebook.com|fb.com)\\/(?!(?:${FACEBOOK_RESERVED_PATHS})(?:[\\'\\"\\?\\.\\/]|$))(profile\\.php\\?id\\=[0-9]{3,20}|(?!profile\\.php)[a-z0-9\\.]{5,51})(?![a-z0-9\\.])(?:/)?`;

// eslint-disable-next-line max-len
const YOUTUBE_REGEX_STRING =
// eslint-disable-next-line max-len
'(?<!\\w)(?:https?:\\/\\/)?(?:youtu\\.be\\/|(?:www\\.|m\\.)?youtube\\.com(?:\\/(?:watch|v|embed|user|c(?:hannel)?)(?:\\.php)?)?(?:\\?[^ ]*v=|\\/))([a-zA-Z0-9\\-_]{2,100})';

// eslint-disable-next-line max-len
const TIKTOK_REGEX_STRING =
// eslint-disable-next-line max-len
'(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:(?:www|m)\\.)?(?:tiktok\\.com)\\/(((?:(?:v|embed|trending)(?:\\?shareId=|\\/))[0-9]{2,50}(?![0-9]))|(?:@)[a-z0-9\\-_\\.]+((?:\\/video\\/)[0-9]{2,50}(?![0-9]))?)(?:\\/)?';

// eslint-disable-next-line max-len
const PINTEREST_REGEX_STRING =
// eslint-disable-next-line max-len
'(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:(?:(?:(?:www\\.)?pinterest(?:\\.com|(?:\\.[a-z]{2}){1,2}))|(?:[a-z]{2})\\.pinterest\\.com)(?:\\/))((pin\\/[0-9]{2,50})|((?!pin)[a-z0-9\\-_\\.]+(\\/[a-z0-9\\-_\\.]+)?))(?:\\/)?';

// eslint-disable-next-line max-len
const DISCORD_REGEX_STRING =
// eslint-disable-next-line max-len
'(?<!\\w)(?:https?:\\/\\/)?(?:www\\.)?((?:(?:(?:canary|ptb).)?(?:discord|discordapp)\\.com\\/channels(?:\\/)[0-9]{2,50}(\\/[0-9]{2,50})*)|(?:(?:(?:canary|ptb).)?(?:discord\\.(?:com|me|li|gg|io)|discordapp\\.com)(?:\\/invite)?)\\/(?!channels)[a-z0-9\\-_]{2,50})(?:\\/)?';

/**
Expand Down
7 changes: 3 additions & 4 deletions test/core/crawlers/browser_crawler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ describe('BrowserCrawler', () => {
let isEvaluated = false;

const browserCrawler = new (class extends BrowserCrawlerTest {
// eslint-disable-next-line max-len
protected override async _navigationHandler(
ctx: PuppeteerCrawlingContext,
gotoOptions: PuppeteerGoToOptions,
Expand Down Expand Up @@ -292,7 +291,6 @@ describe('BrowserCrawler', () => {
});
let optionsGoto: PuppeteerGoToOptions;
const browserCrawler = new (class extends BrowserCrawlerTest {
// eslint-disable-next-line max-len
protected override async _navigationHandler(
ctx: PuppeteerCrawlingContext,
gotoOptions: PuppeteerGoToOptions,
Expand Down Expand Up @@ -941,9 +939,10 @@ describe('BrowserCrawler', () => {
await crawler.run([serverAddress]);

expect(spy).toBeCalled();
// eslint-disable-next-line max-len
expect(spy.mock.calls[0][0]).toEqual(
'When using RequestList and RequestQueue at the same time, you should instantiate both explicitly and provide them in the crawler options, to ensure correctly handled restarts of the crawler.',
'When using RequestList and RequestQueue at the same time, ' +
'you should instantiate both explicitly and provide them in the crawler options, ' +
'to ensure correctly handled restarts of the crawler.',
);
expect(spy.mock.calls[1][0]).toEqual(expect.stringContaining(proxyError));
});
Expand Down
4 changes: 2 additions & 2 deletions test/core/crawlers/cheerio_crawler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ describe('CheerioCrawler', () => {
maxConcurrency: 2,
});

// eslint-disable-next-line max-len
await expect(cheerioCrawler.run()).rejects.toThrow(
"Route not found for label 'undefined'. You must set up a route for this label or a default route. Use `requestHandler`, `router.addHandler` or `router.addDefaultHandler`.",
"Route not found for label 'undefined'. You must set up a route for this label or a default route. " +
'Use `requestHandler`, `router.addHandler` or `router.addDefaultHandler`.',
);
});

Expand Down
2 changes: 1 addition & 1 deletion test/core/crawlers/puppeteer_crawler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ describe('PuppeteerCrawler', () => {

test('should throw if launchOptions.proxyUrl is supplied', async () => {
try {
// eslint-disable-next-line
new PuppeteerCrawler({
//eslint-disable-line
requestList,
maxRequestRetries: 0,
maxConcurrency: 1,
Expand Down
3 changes: 0 additions & 3 deletions test/core/error_tracker.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
/* eslint-disable no-multi-spaces */
import exp from 'node:constants';

import { ErrorTracker } from '../../packages/utils/src/internals/error_tracker';

const random = () => Math.random().toString(36).slice(2);
Expand Down
2 changes: 1 addition & 1 deletion test/core/puppeteer_request_interception.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ describe('utils.puppeteer.addInterceptRequestHandler|removeInterceptRequestHandl
// Override headers
const headers = {
...request.headers(),
accept: 'text/html',
'accept': 'text/html',
'accept-language': 'en-GB',
'upgrade-insecure-requests': '2',
};
Expand Down

0 comments on commit 75db005

Please sign in to comment.