php/googleClientLib/godrList-callback.php

<?php

/*  ------------ documewntation
        see https://developers.google.com/identity/protocols/oauth2/web-server 
    ----------- installation
    download latest release, select zip with appropriate release and expand
        https://github.com/googleapis/google-api-php-client/releases
        https://github.com/googleapis/google-api-php-client-services/releases
*/


function err($m) {
    throw new Error("error: $m");   
}

require '/wkData/install/google-api-client/vendor/autoload.php';
require '/wkData/install/google-api-services/autoload.php';
// include_once __DIR__ . '/google-api/templates/base.php';

define('CLI', php_sapi_name() === 'cli');
const SCOPES = 
            [ 'https://www.googleapis.com/auth/drive.metadata.readonly' # drive readonly
            , 'https://www.googleapis.com/auth/drive'                   # drive file update
            , 'https://www.googleapis.com/auth/documents.readonly'      # docs readonly
            #, 'https://www.googleapis.com/auth/documents'               # docs readWrite
            ];
const KYPR = '/wkData/wk/extr2/'; # path prefix for locallly stored keys
define('RRURL',  CLI ? 'http://localhost/home/inf/php/goauth2callback.php' # redirect URL, authorization server sends token to this URL
                     : "$_SERVER[REQUEST_SCHEME]://$_SERVER[HTTP_HOST]$_SERVER[PHP_SELF]") ;
       # , RRURL = 'http://localhost/home/inf/php/e05GetServer.php' 
const RRCPA = '/wkData/pub/oauth2.log'; # redirect URL writes code to this path
;

# echo "rrurl '" . RRURL . "' $_SERVER[REQUEST_SCHEME]<br>";
if (! CLI)
    goauthSessionStart();
# echo "start " . __FILE__ . "\n";

const DTLFMT = 'Y-m-d\tH:i:s';

function goauthSessionStart() {
    global $goauthGet;
    session_start();
    #echo "<br>\$_SESSION " . print_r($_SESSION);
    #echo "<br>\$_GET " . print_r($_GET);
    if (! (isset($_GET['code']) and isset($_GET['state']))) {
        # echo "goauth does not seem redirect from google OAUTH2";
        return;
    }
    if ($_GET['state'] !== ($_SESSION['goauthOriginalState'] ?? '?'))
        err('goautSessionStart state mismatch from google OAUTH2');
    $goauthGet = $_GET;
    $_GET = $_SESSION['goauthOriginalGet'] ?? ['one' => 1, 'zwei' => 'zwo 2'];
    echo "<br>\$_goauthGet after swap " . print_r($goauthGet);
    echo "<br>\$_GET after swap " . print_r($_GET);
    unset($_SESSION['goauthOriginalGet']);
    unset($_SESSION['goauthOriginalState']);
}

function goauthCodeWeb($client) {
    global $goauthGet;
    /*  we request from google a code to authorize the resource owner
        because we are in the cli interface, we start a web browseer, and google will send the answer to an url
        however, it seems, that an uri on localhost works, but not one on a different server ....
        our url will write the received code into a file RRCPA, that we can read
    */
    if (isset($goauthGet['code'])) {
            echo "returniong code $goauthGet[code]<br>";

        return $goauthGet['code'];
        }
    $client->setState($state = base64_encode(random_bytes(16)));
    $client->setRedirectUri(RRURL);
    $authUrl = $client->createAuthUrl();
    $goauthGet = $_GET;
    $_SESSION['goauthOriginalState'] =$state;
    $_SESSION['goauthOriginalGet'] =$_GET;
    header("location: $authUrl");
    exit();
}


function goClient($cliN, $rorN) {
    /*  build and return an authorized google client
        $cliN = client name, i.e. the application/project as register at google a
                       at Konto ...Drittanbieter-Apps und -Dienste:  https://myaccount.google.com/connections 
        $rorN = resource owner Name, i.e.the user owning the drive
    */
    $cliPa  = KYPR . "goCli${cliN}Key.json"; // path to client keys
    $rorPa  = KYPR . "goRor${rorN}Tkn.json"; // path to resource owner tokens

    $client = new Google_Client();
    # $client->setApplicationName($cliN); # "NQuickstart"); # seems unnecessary ....
    $client->setAuthConfig($cliPa);
    $client->setScopes(SCOPES);
    $client->setAccessType('offline');
    # $client->setDeveloperKey($apikey); # does not work: apiKey only identifies client, and prohibits any access to private data

    if (file_exists($rorPa)) { // read previously obtained accessToken
        $accessToken = json_decode(file_get_contents($rorPa), true);
        echo "--- read resource owner token json from $rorPa\n" . print_r($accessToken, true) . "\n";
        $client->setAccessToken($accessToken);
    }
    if ($client->isAccessTokenExpired()) {
        # echo "resource owner $rorN access token is expired - refreshing\n";
        // Refresh the token if possible, else fetch a new one.
        if ($client->getRefreshToken()) {
            $accessToken = $client->getAccessToken();
            echo "refreshed ok\n";
        } else { 
            // Request authorization from the user using google authorization.
            $code = php_sapi_name() === 'cli' ? goauthCodeCli($client) : goauthCodeWeb($client);
             // Exchange authorization code for an access token.
            echo "got code $code<br>";
            $accessToken = $client->fetchAccessTokenWithAuthCode($code);
            echo "got resource owner access token " . print_r($accessToken, true) . "\n";
            }
        if (array_key_exists('error', $accessToken))
             err("accesstoken " . print_r($accessToken, true));
        $client->setAccessToken($accessToken);

        // write new access token to $rorPa
        if (!file_exists(dirname($rorPa))) {
            mkdir(dirname($rorPa), 0700, true);
        }
        file_put_contents($rorPa, json_encode($client->getAccessToken()));
        echo "written new resource owner token to $rorPa\n";
    }
    return $client;
}

function goauthCodeCli($client) {
    /*  we request from google a code to authorize the resource owner
        because we are in the cli interface, we start a web browseer, and google will send the answer to an url
        however, it seems, that an uri on localhost works, but not one on a different server ....
        our url will write the received code into a file RRCPA, that we can read
    */
    $client->setState($state = base64_encode(random_bytes(16)));
    $client->setRedirectUri(RRURL);
    unlink(RRCPA);
    $authUrl = $client->createAuthUrl();
    system("xdg-open '$authUrl'");
    echo "opening google authorization: xdg-open $authUrl";
    do {
        sleep(1);
        echo "waiting for you to give google authorization in browser\n";            
    } while(! is_file(RRCPA));
    $codeS = file_get_contents(RRCPA);
    if (! preg_match('/\Rstate=([^\n\r]*)\R/', $codeS, $ma))
        err("state not found in response file $codeS");
    elseif ($ma[1] !== $state)
        err("state mismatch got $ma[1] not as expected $state");
    elseif (! preg_match('/\Rcode=([^\n\r]*)\R/', $codeS, $mc))
        err("code not found in response file $codeS");
    elseif (! preg_match('/\Rscope=([^\n\r]*)\R/', $codeS, $ms))
        err("scope not found in response file $codeS");
    echo "found code $mc[1] scope $ms[1]\n";
    unlink(RRCPA);
    return $mc[1];
}



function goDrList($srv, $pa, $pPa, $one) {
    $opts = [ 'pageSize' => 10
            , 'fields' => 'nextPageToken, files(id,name,mimeType)'
            , 'q' => '"' . $pa .'" in parents'
            , 'orderBy' => 'name'
            ];
    do {
        $results = $srv->files->listFiles($opts);
        foreach ($files=$results->getFiles() as $f) {
            $p = "$pPa{$f->getName()}";
            $one($f, $p);
            if ('application/vnd.google-apps.folder' === $f->getMimeType())
                goDrList($srv, $f->getiD(), "$p/", $one);
        }
        $opts['pageToken'] = $nx = $results->getNextPageToken();
        # echo "list result " . count($files) . ' first ' . $files[0]->getName() . ', nextPage ' . ($nx === '' ? 'e' : '-') . (is_null($nx) ? 'n' : '-') . print_r($nx, true) . "\n";
    } while (! is_null($nx));
/*
    if (empty($files)) {
        print "No files found.\n";
    } else {
        print "Files:\n";
        foreach ($files as $file) {
            $id = $file->id;

            printf("%s - (%s) - (%s)\n", $file->getId(), $file->getName(), $file->getMimeType());
        }
    }} */
}
$c = goClient('Qui', 'Wlklxy');
$drv = new Google\Service\Drive($c);
$ix = 0;
goDrList($drv, 'root', '', function($f, $p) use(&$ix) { echo ++$ix . ": $p, n={$f->getName()}, id={$f->getiD()}, type={$f->getMimeType()} \n"; if($ix>20) err('ende stop');});
err('ende gut');
    $res = $drv->files->listFiles();
    var_dump($res);
    $files = $res->getFiles(['q' => '"1XWgDJ8WV5EcYWDFgAIATysr8kDmNxvJ9" in parents']);

    if (empty($files)) {
        print "No files found.\n";
    } else {
        print "Files:" . count($files) . "\n";
        foreach ($files as $f) {
            print_r($f);
        }
    }



/* ----------------------------------------------------------------py
    goffid.py: Google Files Folders Ids utitlities

    goFfId.py [-h] [-n name] [-p parent] cert fun plus*

    cert: the google certificate to use, hint currently inuse: a=admin@spWallisellen.ch f=fiwiko@wlkl.ch w=wa@wlkl.ch 
    
    fun: the function to perform

    Migration: replace foreign files by copies of root owner. uses 3 files
        * id: files in drive, including current googleDriveId and originId (if replaced):  goffid-<cert>.csv in driveRoot
        * act: contains changes since last id, plus proposed migration actions :           goffid-<cert>-act.csv in local directory
        * mig: files in migration process with current/future and origin ids:              goffid-<cert>-mig.csv in driveRoot

        The different steps
        * migAct: analyse drive list, id, mig and produce act, with migration proposals
        * manually: check/modify act
        * migMig: for the files in act with migration action, 
            create copies, for folders empty folders in zFremdeOrigiTemp 
                (if not already created and registered in mig)
            and append them to mig, after removing entries in mig, that are already in id
        * migSwap: for each file/folder in mig
            move the new copy from zFremdeOrigiTemp to destination folder
            for directories, move the contained elements to new folder
            move the origin to zFremdeOriginale
        * migDoc update the links in all documents (origin -> id from id and mig (if swapped)
        * migId: analyse drive list, id, mig and upload a new version of id, with
            updated list of origins
            changes since old version of id
        * mig2 steps migMig to migId in sequence

    chown user: change the owner to user 
        of files owned by cert and residing in folders owned by by user, if possible

    l
    list query: google q= ist joined from plus. attention for shell quoting, use e.g.
            ./goffid.py w list 'mimeType="application/vnd.google-apps.folder"' and "'root'" in parents


    up filename mimetype modifiedTime?; upload filename to drive 
        with name -n (default filename) in folder -p (default root) with the given modifiedTime

    updDoc docid+: update the links in the docs with the given ids, using id and mig for the mapping origin -> new

8. 4.23 moved script to pc/bin, moved authorizations to /wkData/wk/extr2
"""


# from __future__ import print_function
import pickle
import os.path
import csv
from datetime import datetime
import time
from io import BytesIO
from io import StringIO

def err(*m):
    print('\n*****\nerror in', __file__ + ':', *m)
    x = 1 / 0

def extendReverse(l, r):
    for i in range(len(r)-1, -1, -1):  
        l.append(r[i])      

def csvWrite(fnhttps://www.wlkl.ch/inf/php/e05GetServer.php, li, flds):
    """ write a list of dictionary to a csv file
        fn filename to write to
        li the list of dictionaries to write
        flds a string with the fieldnames to write (separated by ' ' or ','
    """
    with open(fn, 'w', newline='') as fi:
        wr = csv.DictWriter(fi, fieldnames=flds.replace(',', ' ').split())
        wr.writeheader()
        wr.writerows(li)
    print('written', len(li), 'rows to', fn)

def csvReadFi(fi, close=False):
    r = list(csv.DictReader(fi))
    if close:
        fi.close()
    return r
 
def csvRead(fn):
    with open(fn, newline='') as f:
        res = csvReadFi(f)
    print('csvRead', fn, len(res))
    return res

#####go: google tiny helpers

gNow = datetime.nhttps://www.wlkl.ch/inf/php/e05GetServer.phpow().timestamp()   # seconds since epoch
gDay = 86400                        # seconds in a day
def goTst(s):                       # google date time string to timstamp
    if s[-1] == 'Z':
        return datetime.fromisoformat(s[0:-1]).timestamp()
    else:
        err(f'goTst {s} bad timestamp format')
    #    return datetime.fromisoformat('2021-07-11T12:16:47.774+00:00').timestamp()

#####go: google credentials and discovery

from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
    
def goCredentials(crt):
    if type(crt) != str or len(crt) < 1:
        err(f"bad certification name: {crt} {type(crt)}")
    """ load the https://www.wlkl.ch/inf/php/e05GetServer.phpcredentials form google OAuth2, 
            see Google APPI Console https://console.developers.google.com/?authuser=0&project=quickstart-1611556606696 
    """
    aPr = '/wkData/wk/extr2/'
    # If modifying these scopes, delete the file token.pickle.
    SCOPES =[ 'https://www.googleapis.com/auth/drive.metadata.readonly' # drive readonly
            , 'https://www.googleapis.com/auth/drive'                   # drive file update
            , 'https://www.googleapis.com/auth/documents.readonly'      # docs readonly
            #, 'https://www.googleapis.com/auth/documents'               # docs readWrite
            ]
    tokPiFi = f'{aPr}googleToken-{crt}.pickle'                                # the file to cache authorization
    credJson = f'{aPr}googleAPIcredentials.json'       # the credentials file generated by google API

    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists(tokPiFi):
        with open(tokPiFi, 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(credJson, SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open(tokPiFi, 'wb') as token:
            pickle.dump(creds, token)
    return creds

#####dr: google drive

# our standard fields for file (dicts)
drFlds1 = "name, id, mimeType, createdTime, modifiedTime, trashed"
drFldsAPI = drFlds1 + ", parents, owners(emailAddress)"
drFldsL = drFlds1 + ", parent owner"
drFldsO = drFldsL + ", lv path"
drFldsP = drFldsO + ", origin change act"
drMimeFolder = 'application/vnd.google-apps.folder'

def pathSort(l):
    """ sort the list of file dicts by path """
    l.sort(key=lambda f: f['path'])
    return l

def driveBuild(cert):
    global driveSrv
    driveSrv = build('drive', 'v3', credentials=goCredentials(cert))

def drInfo():
    """ print the drive info """
    global drUser, drRoot
    # Call the Drive v3 API
    ab = driveSrv.about().get(fields='user(displayName, emailAddress, kind), storageQuota').execute()
    # print(ab)   
    drUser = ab['user']['emailAddress']
    drRoot = drGet('root')
    #print('root', drRoot)
    print(ab['user']['kind'], ab['user']['displayName'], drUser
            , '; storage (MB) usage', round(int(ab['storageQuota']['usage'])/1048576), ', trash', round(int(ab['storageQuota']['usageInDriveTrash'])/1048576)
                       , ', limit', round(int(ab['storageQuota']['limit'])/1048576), 'root=', drRoot['name'], drRoot['id'])

def drFiAtts(f):
    """ return the file dict f after default massage of attributes """
    if 'parents' in f:      # add the additional fields
        if len(f['parents']) != 1:
            err('parents in', f);
        f['parent'] = f['parents'][0]
        del f['parents']
    if 'owners' in f:
        if len(f['owners']) != 1 or 'emailAddress' not in f['owners'][0]:
            err('owners in', f);
        f['owner'] =  f['owners'][0]['emailAddress']
        del f['owners']
    return f

def drGet(fid, fFlds = drFldsAPI): 
    """ get the file dict for the given id """ 
    return drFiAtts(driveSrv.files().get(fileId=fid,fields=fFlds).execute())

def drList(foId, fFlds = drFldsAPI, orderBy=None):  
    """ return a list of files and folders directly in the google drive folder foId 
        each file or folder as a dict with the given fields 
    """
    ff = []
    q = foId[2:] if foId[0:2] == 'q=' else '' if foId == '' else f"'{foId}' in parents"
    next = None
    while True: 
        res = driveSrv.files().list(
            pageSize=500, pageToken=next, fields=f"nextPageToken, files({fFlds})"
                , q=q, orderBy=orderBy
                ).execute()
        # print(f'list found {len(res)}: {res}');
        ac = res.get('files', [])
        for a in ac:
            drFiAtts(a)
        ff += ac
        next = res.get('nextPageToken')
        if next == None:
            return ff

def drListOne(pa, nm, retNone=False):
    """ return file dict for a filename in a parent folder
        return None if it does not exist
        error if multiple files or thrashed
    """
    res = drList(f"q=name='{nm}' and '{pa}' in parents")
    if len(res) == 1 and not res[0]['trashed']:
        return drFiAtts(res[0])
    elif len(res) == 0:
        return None if retNone else err(f'drListOne: no file {pa}/{nm}')
    else:
        err(f'drListOne: file {pa}/{nm} multiple or thrashed', res) 

def drWalk(af,fFlds = drFldsAPI, stop=None):
    """ iterate over all files from one or several folders in a google drive recursively, depth first
        af can be
            id of folder
            a list of id's of folders
            a dictionary of folderId => path root name
        yields drWalkLi
    """

    at = type(af)
    ff = []
    for i, pa in (af if at == dict else {af: ''} if at == str else {i: '' for i in af}).items(): # arg af to dict
        f = drGet(i, fFlds)
        f['path'] = pa
        f['lv'] = 0
        ff.append(f)
    yield from drWalkLi(ff, fFlds, stop)

def drWalkLi(ff, fFlds = drFldsAPI, stop=None):
    """ iterate over all files in list ff recursively. 
        
        the elements of ff must be a dict with fields fFlds after drFiAtts plus path
        yields a dict of the fieldnames given by fFlds (after drFiAtts), plus
            'lv' the level within the filetree, starting with 0 in the folders in af
    """
    doneOrQueued = set() if stop==None else stop
    print('drWalkLi(', [f['name'] for f in ff], fFlds, stop, ')')
    stck = []
    extendReverse(stck, ff)             # push the start list to the work stack
                                        # for lists only append and pop from the end are efficient
    cF = cT = 0
    while len(stck) > 0: 
        f = stck.pop()                  # pop next item from stck
        if f['id'] in doneOrQueued:
            print(f"--drWalkLi already doneOrQueued {f['path']} {f['lv']} {f['mimeType']} {f['id']}")
        else:
            doneOrQueued.add(f['id'])
            yield f    
            cT += 1
            if f['mimeType'] == drMimeFolder:
                cF += 1
                ch = drList(f['id'], fFlds, 'name')    # get contents of this folder
                lv = f['lv'] + 1
                pp = f['path'] + '/' if f['path'] != '' else ''
                for c in ch:
                    c['lv'] = lv 
                    c['path'] = pp + c['name']
                extendReverse(stck, ch)                # push the children to the work stack
                print(f'--drWalkLi {cF}/{cT} stack {len(stck)} level {f["lv"]} beginning {f["path"]}')
    print(f'--drWalkLi {cF}/{cT} end of iterator')

def drCopy(id, pa, fn):
    """ copy google drive file with id into folder pa with new name nm and return id of new copy """
    # gen = driveSrv.files().generateIds(count=2).execute() !!! do not use fails with: Generated IDs are not supported for Docs Editors formats.
    res = driveSrv.files().copy(fileId=id, fields=drFldsAPI, body={'name':fn, 'parents': [pa]}).execute()
    print('copy result', fn, res)
    return drFiAtts(res)

def drCreate(pa, nm, mime=drMimeFolder):
    """ create a file in folder pa, with the give name and mimeType, withoud data """
    return drFiAtts(driveSrv.files().create(body={'name': nm, 'parents': [pa], 'mimeType': mime}, fields=drFldsAPI).execute())

def drUploadVers(fn, pa, mime, dt=None, nm=None):  
    """
        upload a new version of a file to googleDrive or if it does not exist yet, create the file
        set keepRevisionForever=True, otherwise the revision will soon disappear, which is not what we need here
        setting the modifiedTime, will set this as the upload time of the version and order the revisions by this upload time
    """

    if nm == None:
        nm = fn
    if dt != None:  #format RFC 3339: 2020-03-17T09:35:22.771Z
        if len(dt) == 10:
            dt += 'T00:00:00'
        if dt[-1] != 'Z':
                dt += 'Z'
    old = drListOne(pa, nm, True)
    media = MediaFileUpload(fn, mimetype=mime)
    if old == None:
        body = {'name': nm, 'parents': [pa]}
        if dt != None: 
            body['modifiedTime'] = dt
        new = driveSrv.files().create(keepRevisionForever=True, body=body, media_body=media, fields=drFldsAPI).execute()
    else:
        new = driveSrv.files().update(fileId=old['id'], keepRevisionForever=True, media_body=media
            , body= {} if dt == None else {'modifiedTime': dt}, fields=drFldsAPI).execute()
    drFiAtts(new)
    print('drUploadVers uploaded', 'file' if old == None else 'version', new)
    return new

def drMove(id, oldPa, newPa):   
    # print('drMove', id, 'in', oldPa, 'to', newPa)
    try:
        res = driveSrv.files().update(fileId=id, removeParents=oldPa, addParents=newPa, fields=drFldsAPI).execute()
    except Exception as e:
        print(f"***\n*** error drMove({id}, {oldPa}, {newPa})\n*** exception:", e)
        print('*** drget', id, drGet(id))
        raise e
    drFiAtts(res)
    if res['parent'] != newPa:
        print('drMove not in new parent', oldPa, 'to', newPa, 'res', res)        
    print('drMove', oldPa, 'to', newPa, 'res', res)
    return res

def drDownload(sid, fn=None):
    """ download a google drive file with id sid
        if fn == '' then return a StringIO (buffer in memory)
        else return write it to the given filename fn and return filename
    """  
    fh = BytesIO() if fn == None else open(fn, 'wb') 
    req = driveSrv.files().get_media(fileId=sid)
    downloader = MediaIoBaseDownload(fh, req)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        # print("Download %d%%." % int(status.progress() * 100))
    # print('drDownload', status)
    if fn == None:
        return StringIO(str(fh.getbuffer(), encoding='utf-8', errors='strict'))
    else:
        fh.close()
        return fn

def drChown(fi, us):
    """ change the owner of file with fileDict fi to user us """
    print('drChown changing', fi['path'], 'owner to', us)
    prms = driveSrv.permissions().list(fileId=fi['id'], fields='*').execute()['permissions']
    print('prms', prms)
    pN = [p for p in prms if 'emailAddress' in p and p['emailAddress'] == us]
    print('pN ***', len(pN), pN)
    try:
        if len(pN) >= 1:
            res = driveSrv.permissions().update(fileId=fi['id'], permissionId=pN[0]['id'], transferOwnership=True, body={'role': 'owner', 'transferOwnership': 'true', 'pendingOwner': 'true'}).execute()
        else:
            res = driveSrv.permissions().create(fileId=fi['id'], transferOwnership=True, body={'role': 'owner', 'type': 'user', 'pendingOwner': 'true', 'emailAddress': us}).execute()
    except BaseException as e:
        err('drChown for', fi['path'], 'update/create except', e, '\nfile', fi)
    # print(res)
    nn = drGet(fi['id'])
    # print('after', nn)
    if fi['name'] != nn['name'] or fi['id'] != nn['id'] :
        err('drChown mismatch fi', fi, '<==>', nn)
    elif us == nn['owner']:
        pass # print(fi['path'], fi['id'], 'owner from', fi['owner'], 'changedTo', nn['owner'], nn)
    else:
        err('could not chown fi', fi, 'to nn', nn, 'permissions', prms)
-------------------------- end py */
?>