Skip to content

Commit

Permalink
How to write a custom data extractor - blog post (#1038)
Browse files Browse the repository at this point in the history
* How to write a custom data extractor - blog post

* Added missing blog posts static content generator provider
  • Loading branch information
norberttech authored Apr 4, 2024
1 parent 4c883f0 commit e0438b4
Show file tree
Hide file tree
Showing 17 changed files with 454 additions and 0 deletions.
2 changes: 2 additions & 0 deletions web/landing/assets/controllers/syntax_highlight_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {Controller} from '@hotwired/stimulus';
import 'highlight.js/styles/github-dark.min.css';
import php from 'highlight.js/lib/languages/php';
import shell from 'highlight.js/lib/languages/shell';
import json from 'highlight.js/lib/languages/json';
import hljs from 'highlight.js/lib/core';

/* stimulusFetch: 'lazy' */
Expand All @@ -11,6 +12,7 @@ export default class extends Controller
{
hljs.registerLanguage('php', php);
hljs.registerLanguage('shell', shell);
hljs.registerLanguage('json', json);
}

connect()
Expand Down
3 changes: 3 additions & 0 deletions web/landing/assets/styles/app.css
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ a {
@apply font-medium text-blue-600 dark:text-blue-500 hover:underline;
}

code {
font-size: 0.9em;
}
3 changes: 3 additions & 0 deletions web/landing/importmap.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,7 @@
'highlight.js/lib/languages/shell' => [
'version' => '11.9.0',
],
'highlight.js/lib/languages/json' => [
'version' => '11.9.0',
],
];
24 changes: 24 additions & 0 deletions web/landing/src/Flow/Website/Controller/BlogController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace Flow\Website\Controller;

use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Attribute\Route;

final class BlogController extends AbstractController
{
public function __construct()
{
}

#[Route('/blog/{date}/{slug}', name: 'blog_post')]
public function post(string $date, string $slug) : Response
{
return $this->render('blog/' . $date . '/' . $slug . '/post.html.twig', [
'template_folder' => 'blog/' . $date . '/' . $slug,
]);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?php

declare(strict_types=1);

namespace Flow\Website\StaticSourceProvider;

use NorbertTech\StaticContentGeneratorBundle\Content\{Source, SourceProvider};

final class BlogPostsProvider implements SourceProvider
{
public function __construct()
{

}

public function all() : array
{
$sources = [];

$sources[] = new Source('blog_post', ['date' => '2024-04-04', 'slug' => 'building-custom-extractor-google-analytics']);

return $sources;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"name": string,
"account": string,
"displayName": string,
"propertySummaries": [
{
object (PropertySummary)
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

namespace Flow\ETL\Adapter\GoogleAnalytics;

use Flow\ETL\FlowContext;
use Google\Analytics\Admin\V1beta\AccountSummary;
use Google\Analytics\Admin\V1beta\AnalyticsAdminServiceClient;
use Google\Analytics\Admin\V1beta\PropertySummary;
use Flow\ETL\Extractor;
use Flow\ETL\Extractor\Limitable;
use Flow\ETL\Extractor\LimitableExtractor;

final class AccountSummariesExtractor implements Extractor, LimitableExtractor
{
use Limitable;

public function __construct(
private readonly AnalyticsAdminServiceClient $client,
private readonly int $pageSize = 200
) {
if ($this->pageSize < 1 || $this->pageSize > 200) {
throw new \Flow\ETL\Exception\InvalidArgumentException('Page size must be greater than 0 and lower than 200.');
}
}

public function extract(FlowContext $context): \Generator
{
// TODO
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?php

namespace Flow\ETL\Adapter\GoogleAnalytics;

use Flow\ETL\Extractor\Signal;
use Flow\ETL\FlowContext;
use Google\Analytics\Admin\V1beta\AccountSummary;
use Google\Analytics\Admin\V1beta\AnalyticsAdminServiceClient;
use Google\Analytics\Admin\V1beta\PropertySummary;
use Flow\ETL\Extractor;
use Flow\ETL\Extractor\Limitable;
use Flow\ETL\Extractor\LimitableExtractor;
use function Flow\ETL\DSL\rows;

final class AccountSummariesExtractor implements Extractor, LimitableExtractor
{
// code from previous snippet

public function extract(FlowContext $context): \Generator
{
$list = $this->client->listAccountSummaries(['pageSize' => $this->pageSize]);

/** @var AccountSummary $account */
foreach ($list->iterateAllElements() as $accountSummary) {
$signal = yield rows(ga_account_summary_to_row($accountSummary));
$this->countRow();

if ($signal === Signal::STOP || $this->reachedLimit()) {
return;
}
}

// TODO: Implement pagination
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<?php

namespace Flow\ETL\Adapter\GoogleAnalytics;

use Flow\ETL\Extractor\Signal;
use Flow\ETL\FlowContext;
use Google\Analytics\Admin\V1beta\AccountSummary;
use Google\Analytics\Admin\V1beta\AnalyticsAdminServiceClient;
use Google\Analytics\Admin\V1beta\PropertySummary;
use Flow\ETL\Extractor;
use Flow\ETL\Extractor\Limitable;
use Flow\ETL\Extractor\LimitableExtractor;
use function Flow\ETL\DSL\rows;

final class AccountSummariesExtractor implements Extractor, LimitableExtractor
{
// code from previous snippet

public function extract(FlowContext $context): \Generator
{
$list = $this->client->listAccountSummaries(['pageSize' => $this->pageSize]);

// code from previous snippet

while ($list->getPage()->hasNextPage()) {
$list = $this->client->listAccountSummaries(['pageSize' => $this->pageSize, 'pageToken' => $list->getPage()->getNextPageToken()]);

foreach ($list->iterateAllElements() as $accountSummary) {
$signal = yield rows(ga_account_summary_to_row($accountSummary));
$this->countRow();

if ($signal === Signal::STOP || $this->reachedLimit()) {
return;
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?php

namespace Flow\ETL\Adapter\GoogleAnalytics;

use Flow\ETL\Extractor\Signal;
use Flow\ETL\FlowContext;
use Google\Analytics\Admin\V1beta\AccountSummary;
use Google\Analytics\Admin\V1beta\AnalyticsAdminServiceClient;
use Google\Analytics\Admin\V1beta\PropertySummary;
use Flow\ETL\Extractor;
use Flow\ETL\Extractor\Limitable;
use Flow\ETL\Extractor\LimitableExtractor;
use function Flow\ETL\DSL\rows;

final class AccountSummariesExtractor implements Extractor, LimitableExtractor
{
use Limitable;

public function __construct(
private readonly AnalyticsAdminServiceClient $client,
private readonly int $pageSize = 200
) {
if ($this->pageSize < 1 || $this->pageSize > 200) {
throw new \Flow\ETL\Exception\InvalidArgumentException('Page size must be greater than 0 and lower than 200.');
}
}

public function extract(FlowContext $context): \Generator
{
$list = $this->client->listAccountSummaries(['pageSize' => $this->pageSize]);

/** @var AccountSummary $account */
foreach ($list->iterateAllElements() as $accountSummary) {
$signal = yield rows(ga_account_summary_to_row($accountSummary));
$this->countRow();

if ($signal === Signal::STOP || $this->reachedLimit()) {
return;
}
}

while ($list->getPage()->hasNextPage()) {
$list = $this->client->listAccountSummaries(['pageSize' => $this->pageSize, 'pageToken' => $list->getPage()->getNextPageToken()]);

foreach ($list->iterateAllElements() as $accountSummary) {
$signal = yield rows(ga_account_summary_to_row($accountSummary));
$this->countRow();

if ($signal === Signal::STOP || $this->reachedLimit()) {
return;
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?php

use Flow\ETL\Adapter\GoogleAnalytics\AccountSummariesExtractor;
use Google\Analytics\Admin\V1beta\AnalyticsAdminServiceClient;

function from_ga_account_summaries(AnalyticsAdminServiceClient $client, int $page_size = 200) : AccountSummariesExtractor
{
return new GoogleAnalyticsExtractor($client, $page_size);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

namespace Flow\ETL\Adapter\GoogleAnalytics;

use Flow\ETL\Row;
use Google\Analytics\Admin\V1beta\AccountSummary;
use Google\Analytics\Admin\V1beta\AnalyticsAdminServiceClient;
use Google\Analytics\Admin\V1beta\PropertySummary;
use function Flow\ETL\DSL\{list_entry, row, str_entry, structure_element, type_integer, type_list, type_string, type_structure};

function ga_account_summary_to_row(AccountSummary $accountSummary) : Row
{
return row(
str_entry('account', $accountSummary->getAccount()),
str_entry('name', $accountSummary->getName()),
str_entry('displayName', $accountSummary->getDisplayName()),
list_entry(
'propertySummaries',
array_map(
static fn(PropertySummary $propertySummary) => [
'property' => $propertySummary->getProperty(),
'displayName' => $propertySummary->getDisplayName(),
'propertyType' => $propertySummary->getPropertyType(),
'parent' => $propertySummary->getParent(),
],
\iterator_to_array($accountSummary->getPropertySummaries())
),
type_list(
type_structure(
[
structure_element('property', type_string()),
structure_element('displayName', type_string()),
structure_element('propertyType', type_integer()),
structure_element('parent', type_string()),
]
)
),
)
);
}
Loading

0 comments on commit e0438b4

Please sign in to comment.