opencl_device.cxx 22.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#include <math.h>
11
#include <float.h>
12 13
#include <iostream>
#include <sstream>
Caolán McNamara's avatar
Caolán McNamara committed
14
#include <memory>
15
#include <vector>
16
#include <algorithm>
17 18 19

#include <comphelper/random.hxx>
#include <opencl/openclconfig.hxx>
20
#include <opencl/openclwrapper.hxx>
21 22
#include <opencl/platforminfo.hxx>
#include <sal/log.hxx>
23
#include <rtl/math.hxx>
24
#include <tools/time.hxx>
25

26 27
#include <opencl/OpenCLZone.hxx>

28
#include <opencl_device.hxx>
29

Kohei Yoshida's avatar
Kohei Yoshida committed
30 31
#define INPUTSIZE  15360
#define OUTPUTSIZE 15360
32 33 34

#define STRINGIFY(...) #__VA_ARGS__"\n"

35 36
namespace {

37 38 39 40 41 42 43
void DS_CHECK_STATUS(cl_int status, char const * name) {
    if (CL_SUCCESS != status)
    {
    SAL_INFO("opencl.device", "Error code is " << status << " at " << name);
    }
}

44 45 46
bool bIsDeviceSelected = false;
ds_device selectedDevice;

47
struct LibreOfficeDeviceEvaluationIO
48
{
49 50 51 52 53
    std::vector<double> input0;
    std::vector<double> input1;
    std::vector<double> input2;
    std::vector<double> input3;
    std::vector<double> output;
54 55
    unsigned long inputSize;
    unsigned long outputSize;
56
};
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117

const char* source = STRINGIFY(
\n#if defined(KHR_DP_EXTENSION)
\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
\n#elif defined(AMD_DP_EXTENSION)
\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
\n#endif
    \n
    int isNan(fp_t a) { return a != a; }
    fp_t fsum(fp_t a, fp_t b) { return a + b; }

    fp_t fAverage(__global fp_t* input)
{
    fp_t sum = 0;
    int count = 0;
    for (int i = 0; i < INPUTSIZE; i++)
    {
        if (!isNan(input[i]))
        {
            sum = fsum(input[i], sum);
            count += 1;
        }
    }
    return sum / (fp_t)count;
}
    fp_t fMin(__global fp_t* input)
{
    fp_t min = MAXFLOAT;
    for (int i = 0; i < INPUTSIZE; i++)
    {
        if (!isNan(input[i]))
        {
            min = fmin(input[i], min);
        }
    }
    return min;
}
    fp_t fSoP(__global fp_t* input0, __global fp_t* input1)
{
    fp_t sop = 0.0;
    for (int i = 0; i < INPUTSIZE; i++)
    {
        sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]);
    }
    return sop;
}
    __kernel void DynamicKernel(
        __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3)
{
    int gid0 = get_global_id(0);
    fp_t tmp0 = fAverage(input0);
    fp_t tmp1 = fMin(input1) * fSoP(input2, input3);
    result[gid0] = fsum(tmp0, tmp1);
}
    );

size_t sourceSize[] = { strlen(source) };

/* Random number generator */
double random(double min, double max)
{
118
    if (rtl::math::approxEqual(min, max))
119
        return min;
120
    return comphelper::rng::uniform_real_distribution(min, max);
121 122 123
}

/* Populate input */
124
void populateInput(std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & testData)
125
{
126 127 128 129
    double* input0 = &testData->input0[0];
    double* input1 = &testData->input1[0];
    double* input2 = &testData->input2[0];
    double* input3 = &testData->input3[0];
130 131 132 133 134 135 136 137 138 139
    for (unsigned long i = 0; i < testData->inputSize; i++)
    {
        input0[i] = random(0, i);
        input1[i] = random(0, i);
        input2[i] = random(0, i);
        input3[i] = random(0, i);
    }
}

/* Evaluate devices */
140
ds_status evaluateScoreForDevice(ds_device& rDevice, std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & testData)
141
{
142
    if (rDevice.eType == DeviceType::OpenCLDevice)
143 144
    {
        /* Evaluating an OpenCL device */
145
        SAL_INFO("opencl.device", "Device: \"" << rDevice.sDeviceName << "\" (OpenCL) evaluation...");
146
        cl_int clStatus;
147

148
        /* Check for 64-bit float extensions */
149 150 151 152 153 154 155 156 157 158 159 160
        std::unique_ptr<char[]> aExtInfo;
        {
            size_t aDevExtInfoSize = 0;

            OpenCLZone zone;
            clStatus = clGetDeviceInfo(rDevice.aDeviceID, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize);
            DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");

            aExtInfo.reset(new char[aDevExtInfoSize]);
            clStatus = clGetDeviceInfo(rDevice.aDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo.get(), nullptr);
            DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
        }
161 162 163

        bool bKhrFp64Flag = false;
        bool bAmdFp64Flag = false;
164
        const char* buildOption = nullptr;
165
        std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
166 167 168
        std::ostringstream tmpOStrStr;
        tmpOStrStr << std::dec << INPUTSIZE;
        tmpStr.append(tmpOStrStr.str());
169

Noel Grandin's avatar
Noel Grandin committed
170
        if ((std::string(aExtInfo.get())).find("cl_khr_fp64") != std::string::npos)
171 172 173 174 175
        {
            bKhrFp64Flag = true;
            //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
            tmpStr.append(" -DKHR_DP_EXTENSION");
            buildOption = tmpStr.c_str();
176
            SAL_INFO("opencl.device", "... has cl_khr_fp64");
177
        }
Noel Grandin's avatar
Noel Grandin committed
178
        else if ((std::string(aExtInfo.get())).find("cl_amd_fp64") != std::string::npos)
179 180 181 182 183
        {
            bAmdFp64Flag = true;
            //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
            tmpStr.append(" -DAMD_DP_EXTENSION");
            buildOption = tmpStr.c_str();
184
            SAL_INFO("opencl.device", "... has cl_amd_fp64");
185 186 187 188 189
        }

        if (!bKhrFp64Flag && !bAmdFp64Flag)
        {
            /* No 64-bit float support */
190 191
            rDevice.fTime = DBL_MAX;
            rDevice.bErrors = false;
192
            SAL_INFO("opencl.device", "... no fp64 support");
193 194 195 196 197
        }
        else
        {
            /* 64-bit float support present */

198 199
            OpenCLZone zone;

200
            /* Create context and command queue */
201
            cl_context  clContext = clCreateContext(nullptr, 1, &rDevice.aDeviceID, nullptr, nullptr, &clStatus);
202
            DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext");
203
            cl_command_queue clQueue = clCreateCommandQueue(clContext, rDevice.aDeviceID, 0, &clStatus);
204 205 206
            DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue");

            /* Build program */
207
            cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus);
208
            DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource");
209
            clStatus = clBuildProgram(clProgram, 1, &rDevice.aDeviceID, buildOption, nullptr, nullptr);
210 211 212 213 214 215
            DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram");
            if (CL_SUCCESS != clStatus)
            {
                /* Build program failed */
                size_t length;
                char* buildLog;
216
                clStatus = clGetProgramBuildInfo(clProgram, rDevice.aDeviceID, CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
217
                buildLog = static_cast<char*>(malloc(length));
218
                clGetProgramBuildInfo(clProgram, rDevice.aDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length);
219
                SAL_INFO("opencl.device", "Build Errors:\n" << buildLog);
220 221
                free(buildLog);

222 223
                rDevice.fTime = DBL_MAX;
                rDevice.bErrors = true;
224 225 226 227
            }
            else
            {
                /* Build program succeeded */
228
                sal_uInt64 kernelTime = tools::Time::GetMonotonicTicks();
229 230

                /* Run kernel */
231
                cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus);
232
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel");
233
                cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, &testData->output[0], &clStatus);
234
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult");
235
                cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize,  &testData->input0[0], &clStatus);
236
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0");
237
                cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize,  &testData->input1[0], &clStatus);
238
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1");
239
                cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize,  &testData->input2[0], &clStatus);
240
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2");
241
                cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize,  &testData->input3[0], &clStatus);
242
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3");
243
                clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), static_cast<void*>(&clResult));
244
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult");
245
                clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), static_cast<void*>(&clInput0));
246
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0");
247
                clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), static_cast<void*>(&clInput1));
248
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1");
249
                clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), static_cast<void*>(&clInput2));
250
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2");
251
                clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), static_cast<void*>(&clInput3));
252 253
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3");
                size_t globalWS[1] = { testData->outputSize };
Noel Grandin's avatar
Noel Grandin committed
254
                size_t const localSize[1] = { 64 };
255
                clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, nullptr, globalWS, localSize, 0, nullptr, nullptr);
256 257 258 259 260 261 262 263 264
                DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
                clFinish(clQueue);
                clReleaseMemObject(clInput3);
                clReleaseMemObject(clInput2);
                clReleaseMemObject(clInput1);
                clReleaseMemObject(clInput0);
                clReleaseMemObject(clResult);
                clReleaseKernel(clKernel);

265
                rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime;
266
                rDevice.bErrors = false;
267 268 269 270 271 272 273 274 275 276
            }

            clReleaseProgram(clProgram);
            clReleaseCommandQueue(clQueue);
            clReleaseContext(clContext);
        }
    }
    else
    {
        /* Evaluating an Native CPU device */
277
        SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation...");
278
        sal_uInt64 kernelTime = tools::Time::GetMonotonicTicks();
279

280 281
        unsigned long j;
        for (j = 0; j < testData->outputSize; j++)
282 283 284 285 286 287 288
        {
            double fAverage = 0.0f;
            double fMin = DBL_MAX;
            double fSoP = 0.0f;
            for (unsigned long i = 0; i < testData->inputSize; i++)
            {
                fAverage += testData->input0[i];
289
                fMin = std::min(fMin, testData->input1[i]);
290 291 292 293
                fSoP += testData->input2[i] * testData->input3[i];
            }
            fAverage /= testData->inputSize;
            testData->output[j] = fAverage + (fMin * fSoP);
294 295 296
            // Don't run for much longer than one second
            if (j > 0 && j % 100 == 0)
            {
297
                rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime;
298 299 300
                if (rDevice.fTime >= 1)
                    break;
            }
301 302
        }

303
        rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime;
304 305

        // Scale time to how long it would have taken to go all the way to outputSize
306
        rDevice.fTime /= (static_cast<double>(j) / testData->outputSize);
307

308 309 310 311
        // InterpretTail - the S/W fallback is nothing like as efficient
        // as any good openCL implementation: no SIMD, tons of branching
        // in the inner loops etc. Generously characterise it as only 10x
        // slower than the above.
312
        rDevice.fTime *= 10.0;
313
        rDevice.bErrors = false;
314 315 316 317
    }
    return DS_SUCCESS;
}

318
ds_status profileDevices(std::unique_ptr<ds_profile> const & pProfile, std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & pTestData)
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
{
    ds_status status = DS_SUCCESS;

    if (!pProfile)
        return DS_INVALID_PROFILE;

    for (ds_device& rDevice : pProfile->devices)
    {
        ds_status evaluatorStatus = evaluateScoreForDevice(rDevice, pTestData);
        if (evaluatorStatus != DS_SUCCESS)
        {
            status = evaluatorStatus;
            return status;
        }
    }
    return status;
}

337
/* Pick best device */
338
ds_status pickBestDevice(std::unique_ptr<ds_profile> const & profile, int& rBestDeviceIndex)
339 340 341
{
    double bestScore = DBL_MAX;

342 343
    rBestDeviceIndex = -1;

344 345
    for (std::vector<ds_device>::size_type d = 0; d < profile->devices.size();
         d++)
346
    {
347
        ds_device& device = profile->devices[d];
348

349
        // Check blacklist and whitelist for actual devices
350
        if (device.eType == DeviceType::OpenCLDevice)
351 352 353 354 355 356 357 358 359
        {
            // There is a silly impedance mismatch here. Why do we
            // need two different ways to describe an OpenCL platform
            // and an OpenCL device driver?

            OpenCLPlatformInfo aPlatform;
            OpenCLDeviceInfo aDevice;

            // We know that only the below fields are used by checkForKnownBadCompilers()
360 361 362
            aPlatform.maVendor = OStringToOUString(device.sPlatformVendor, RTL_TEXTENCODING_UTF8);
            aDevice.maName = OStringToOUString(device.sDeviceName, RTL_TEXTENCODING_UTF8);
            aDevice.maDriver = OStringToOUString(device.sDriverVersion, RTL_TEXTENCODING_UTF8);
363 364

            // If blacklisted or not whitelisted, ignore it
365
            if (OpenCLConfig::get().checkImplementation(aPlatform, aDevice))
366
            {
367 368 369
                SAL_INFO("opencl.device", "Device[" << d << "] " << device.sDeviceName << " is blacklisted or not whitelisted");
                device.fTime = DBL_MAX;
                device.bErrors = false;
370 371 372
            }
        }

Michael Meeks's avatar
Michael Meeks committed
373
        double fScore = DBL_MAX;
374 375
        if (device.fTime >= 0.0
            || rtl::math::approxEqual(device.fTime, DBL_MAX))
376
        {
377
            fScore = device.fTime;
378
        }
379
        else
David Tardon's avatar
David Tardon committed
380
        {
381
            SAL_INFO("opencl.device", "Unusual null score");
David Tardon's avatar
David Tardon committed
382
        }
383

384
        if (device.eType == DeviceType::OpenCLDevice)
385
        {
386
            SAL_INFO("opencl.device", "Device[" << d << "] " << device.sDeviceName << " (OpenCL) score is " << fScore);
387 388 389
        }
        else
        {
390
            SAL_INFO("opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore);
391
        }
392
        if (fScore < bestScore)
393
        {
394
            bestScore = fScore;
395
            rBestDeviceIndex = d;
396 397
        }
    }
398
    if (rBestDeviceIndex != -1 && profile->devices[rBestDeviceIndex].eType == DeviceType::OpenCLDevice)
399
    {
400
        SAL_INFO("opencl.device", "Selected Device[" << rBestDeviceIndex << "]: " << profile->devices[rBestDeviceIndex].sDeviceName << "(OpenCL).");
401 402 403
    }
    else
    {
404
        SAL_INFO("opencl.device", "Selected Device[" << rBestDeviceIndex << "]: CPU (Native).");
405 406 407 408 409
    }
    return DS_SUCCESS;
}

/* Return device ID for matching device name */
410
int matchDevice(std::unique_ptr<ds_profile> const & profile, char* deviceName)
411 412
{
    int deviceMatch = -1;
413
    for (size_t d = 0; d < profile->devices.size() - 1; d++)
414
    {
415
        if (profile->devices[d].sDeviceName.indexOf(deviceName) != -1)
416
            deviceMatch = d;
417
    }
418 419
    if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos)
        deviceMatch = profile->devices.size() - 1;
420 421 422
    return deviceMatch;
}

423 424 425 426 427
class LogWriter
{
private:
    SvFileStream maStream;
public:
428
    explicit LogWriter(OUString const & aFileName)
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
        : maStream(aFileName, StreamMode::WRITE)
    {}

    void text(const OString& rText)
    {
        maStream.WriteOString(rText);
        maStream.WriteChar('\n');
    }

    void log(const OString& rKey, const OString& rValue)
    {
        maStream.WriteOString(rKey);
        maStream.WriteCharPtr(": ");
        maStream.WriteOString(rValue);
        maStream.WriteChar('\n');
    }

    void log(const OString& rKey, int rValue)
    {
        log(rKey, OString::number(rValue));
    }

    void log(const OString& rKey, bool rValue)
    {
        log(rKey, OString::boolean(rValue));
    }
};


458
void writeDevicesLog(std::unique_ptr<ds_profile> const & rProfile, OUString const & sProfilePath, int nSelectedIndex)
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
{
    OUString aCacheFile(sProfilePath + "opencl_devices.log");
    LogWriter aWriter(aCacheFile);

    int nIndex = 0;

    for (ds_device& rDevice : rProfile->devices)
    {
        if (rDevice.eType == DeviceType::OpenCLDevice)
        {
            aWriter.log("Device Index", nIndex);
            aWriter.log("  Selected", nIndex == nSelectedIndex);
            aWriter.log("  Device Name", rDevice.sDeviceName);
            aWriter.log("  Device Vendor", rDevice.sDeviceVendor);
            aWriter.log("  Device Version", rDevice.sDeviceVersion);
            aWriter.log("  Driver Version", rDevice.sDriverVersion);
            aWriter.log("  Device Type", rDevice.sDeviceType);
            aWriter.log("  Device Extensions", rDevice.sDeviceExtensions);
            aWriter.log("  Device OpenCL C Version", rDevice.sDeviceOpenCLVersion);

            aWriter.log("  Device Available", rDevice.bDeviceAvailable);
            aWriter.log("  Device Compiler Available", rDevice.bDeviceCompilerAvailable);
            aWriter.log("  Device Linker Available", rDevice.bDeviceLinkerAvailable);

            aWriter.log("  Platform Name", rDevice.sPlatformName);
            aWriter.log("  Platform Vendor", rDevice.sPlatformVendor);
            aWriter.log("  Platform Version", rDevice.sPlatformVersion);
            aWriter.log("  Platform Profile", rDevice.sPlatformProfile);
            aWriter.log("  Platform Extensions", rDevice.sPlatformExtensions);
            aWriter.text("");
        }
        nIndex++;
    }
}

} // end anonymous namespace

496
ds_device const & getDeviceSelection(
497
    OUString const & sProfilePath, bool bForceSelection)
498 499 500 501 502
{
    /* Run only if device is not yet selected */
    if (!bIsDeviceSelected || bForceSelection)
    {
        /* Setup */
503 504 505
        std::unique_ptr<ds_profile> aProfile;
        ds_status status;
        status = initDSProfile(aProfile, "LibreOffice v1");
506

507
        if (status != DS_SUCCESS)
508 509
        {
            // failed to initialize profile.
510
            selectedDevice.eType = DeviceType::NativeCPU;
511 512 513
            return selectedDevice;
        }

514
        /* Try reading scores from file */
515
        OUString sFilePath = sProfilePath + "opencl_profile.xml";
516

517 518
        if (!bForceSelection)
        {
519
            status = readProfile(sFilePath, aProfile);
520 521 522 523
        }
        else
        {
            status = DS_INVALID_PROFILE;
524
            SAL_INFO("opencl.device", "Performing forced profiling.");
525 526 527
        }
        if (DS_SUCCESS != status)
        {
Michael Meeks's avatar
Michael Meeks committed
528 529
            if (!bForceSelection)
            {
530
                SAL_INFO("opencl.device", "Profile file not available (" << sFilePath << "); performing profiling.");
Michael Meeks's avatar
Michael Meeks committed
531
            }
532 533

            /* Populate input data for micro-benchmark */
Caolán McNamara's avatar
Caolán McNamara committed
534
            std::unique_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO);
535 536
            testData->inputSize  = INPUTSIZE;
            testData->outputSize = OUTPUTSIZE;
537 538 539 540 541
            testData->input0.resize(testData->inputSize);
            testData->input1.resize(testData->inputSize);
            testData->input2.resize(testData->inputSize);
            testData->input3.resize(testData->inputSize);
            testData->output.resize(testData->outputSize);
542
            populateInput(testData);
543 544

            /* Perform evaluations */
545
            status = profileDevices(aProfile, testData);
546

547 548 549
            if (DS_SUCCESS == status)
            {
                /* Write scores to file */
550
                status = writeProfile(sFilePath, aProfile);
551 552
                if (DS_SUCCESS == status)
                {
553
                    SAL_INFO("opencl.device", "Scores written to file (" << sFilePath << ").");
554 555 556
                }
                else
                {
557
                    SAL_INFO("opencl.device", "Error saving scores to file (" << sFilePath << "); scores not written to file.");
558 559 560 561
                }
            }
            else
            {
562
                SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file.");
563 564 565 566
            }
        }
        else
        {
567
            SAL_INFO("opencl.device", "Profile read from file (" << sFilePath << ").");
568 569 570 571
        }

        /* Pick best device */
        int bestDeviceIdx;
572
        pickBestDevice(aProfile, bestDeviceIdx);
573

574
        /* Override if necessary */
575
        char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE");
576
        if (nullptr != overrideDeviceStr)
577
        {
578
            int overrideDeviceIdx = matchDevice(aProfile, overrideDeviceStr);
579 580
            if (-1 != overrideDeviceIdx)
            {
581
                SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
582
                bestDeviceIdx = overrideDeviceIdx;
583
                if (aProfile->devices[bestDeviceIdx].eType == DeviceType::OpenCLDevice)
584
                {
585
                    SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << aProfile->devices[bestDeviceIdx].sDeviceName << " (OpenCL).");
586 587 588
                }
                else
                {
589
                    SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native).");
590 591 592 593
                }
            }
            else
            {
594
                SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
595 596 597 598
            }
        }

        /* Final device selection */
599 600 601 602
        if (bestDeviceIdx >=0 && static_cast< std::vector<ds_device>::size_type> ( bestDeviceIdx ) < aProfile->devices.size() )
        {
            selectedDevice = aProfile->devices[bestDeviceIdx];
            bIsDeviceSelected = true;
603

604 605 606 607
            writeDevicesLog(aProfile, sProfilePath, bestDeviceIdx);
        } else {
            selectedDevice.eType = DeviceType::NativeCPU;
        }
608 609 610 611 612
    }
    return selectedDevice;
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */