bigquerystorage/quickstart.php (65 lines of code) (raw):

<?php /** * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ # [START bigquerystorage_quickstart] // Includes the autoloader for libraries installed with composer require __DIR__ . '/vendor/autoload.php'; use Google\Cloud\BigQuery\Storage\V1\BigQueryReadClient; use Google\Cloud\BigQuery\Storage\V1\DataFormat; use Google\Cloud\BigQuery\Storage\V1\ReadSession; use Google\Cloud\BigQuery\Storage\V1\ReadSession\TableModifiers; use Google\Cloud\BigQuery\Storage\V1\ReadSession\TableReadOptions; use Google\Protobuf\Timestamp; // Instantiates the client and sets the project $client = new BigQueryReadClient(); $project = $client->projectName('YOUR_PROJECT_ID'); $snapshotMillis = 'YOUR_SNAPSHOT_MILLIS'; // This example reads baby name data from the below public dataset. $table = $client->tableName( 'bigquery-public-data', 'usa_names', 'usa_1910_current' ); // This API can also deliver data serialized in Apache Arrow format. // This example leverages Apache Avro. $readSession = new ReadSession(); $readSession->setTable($table)->setDataFormat(DataFormat::AVRO); // We limit the output columns to a subset of those allowed in the table, // and set a simple filter to only report names from the state of // Washington (WA). $readOptions = new TableReadOptions(); $readOptions->setSelectedFields(['name', 'number', 'state']); $readOptions->setRowRestriction('state = "WA"'); $readSession->setReadOptions($readOptions); // With snapshot millis if present if (!empty($snapshotMillis)) { $timestamp = new Timestamp(); $timestamp->setSeconds($snapshotMillis / 1000); $timestamp->setNanos((int) ($snapshotMillis % 1000) * 1000000); $tableModifier = new TableModifiers(); $tableModifier->setSnapshotTime($timestamp); $readSession->setTableModifiers($tableModifier); } try { $session = $client->createReadSession( $project, $readSession, [ // We'll use only a single stream for reading data from the table. // However, if you wanted to fan out multiple readers you could do so // by having a reader process each individual stream. 'maxStreamCount' => 1 ] ); $stream = $client->readRows($session->getStreams()[0]->getName()); // Do any local processing by iterating over the responses. The // google-cloud-bigquery-storage client reconnects to the API after any // transient network errors or timeouts. $schema = ''; $names = []; $states = []; foreach ($stream->readAll() as $response) { $data = $response->getAvroRows()->getSerializedBinaryRows(); if ($response->hasAvroSchema()) { $schema = $response->getAvroSchema()->getSchema(); } $avroSchema = AvroSchema::parse($schema); $readIO = new AvroStringIO($data); $datumReader = new AvroIODatumReader($avroSchema); while (!$readIO->is_eof()) { $record = $datumReader->read(new AvroIOBinaryDecoder($readIO)); $names[$record['name']] = ''; $states[$record['state']] = ''; } } $states = array_keys($states); printf( 'Got %d unique names in states: %s' . PHP_EOL, count($names), implode(', ', $states) ); } finally { $client->close(); } # [END bigquerystorage_quickstart]